diff --git a/.github/workflows/ci-docs.yaml b/.github/workflows/ci-docs.yaml index fd9b16ed2..4b13d9ef9 100644 --- a/.github/workflows/ci-docs.yaml +++ b/.github/workflows/ci-docs.yaml @@ -21,6 +21,8 @@ jobs: run: | python -m pip install ".[all]" - uses: quarto-dev/quarto-actions/setup@v2 + with: + tinytex: true - name: Build docs run: | make docs-build diff --git a/.gitignore b/.gitignore index 20f5bf690..8b8f53c5d 100644 --- a/.gitignore +++ b/.gitignore @@ -107,10 +107,14 @@ docs/source/generated/ docs/source/reference/ # Playground Scripts and temporary outputs +.DS_Store playground*.py playground*.qmd playground.html playground_files manual-testing.py manual-testing.ipynb -.DS_Store +latex_testing.qmd +latex_testing.pdf +latex_examples.qmd +latex_examples.pdf diff --git a/docs/_quarto.yml b/docs/_quarto.yml index a0ca0dd64..7dd56f3b3 100644 --- a/docs/_quarto.yml +++ b/docs/_quarto.yml @@ -214,6 +214,7 @@ quartodoc: - GT.save - GT.show - GT.as_raw_html + - GT.as_latex - title: Value formatting functions desc: > If you have single values (or lists of them) in need of formatting, we have a set of diff --git a/docs/a-latex_examples/index.qmd b/docs/a-latex_examples/index.qmd new file mode 100644 index 000000000..503c7b22b --- /dev/null +++ b/docs/a-latex_examples/index.qmd @@ -0,0 +1,498 @@ +--- +format: pdf +--- + +```{python} +#| label: setup +#| include: false + +import polars as pl +from great_tables import GT +from great_tables.data import gtcars, islands, airquality, constants, exibble +``` + +```{python} +# | output: asis +# | echo: false + +gtcars_pl = ( + pl.from_pandas(gtcars) + .select(["mfr", "model", "hp", "trq", "mpg_c", "msrp"]) +) + +print( + GT( + gtcars[["mfr", "model", "hp", "trq", "msrp"]].head(5), + ) + .tab_header( + title="Some Cars from the gtcars Dataset", + subtitle="Five Cars are shown here" + ) + .tab_spanner( + label="Make and Model", + columns=["mfr", "model"], + id="make_model" + ) + .tab_spanner( + label="Performance", + columns=["hp", "trq", "msrp"] + ) + .tab_spanner( + label="Everything but the cost", + columns=["mfr", "model", "hp", "trq"] + ) + .fmt_integer(columns=["hp", "trq"]) + .fmt_currency(columns="msrp") + .tab_source_note("Cars are all 2015 models.") + .tab_source_note("Horsepower and Torque values are estimates.") + .tab_options(table_width="600pt") + .as_latex() +) +``` + + +```{python} +# | output: asis +# | echo: false + +islands_mini = islands.head(10) + +print( + ( + GT(islands_mini) + .tab_header( + title="Large Landmasses of the World", + subtitle="The top ten largest are presented" + ) + .fmt_integer(columns="size") + .tab_source_note( + source_note="Source: The World Almanac and Book of Facts, 1975, page 406." + ) + .tab_source_note( + source_note="Reference: McNeil, D. R. (1977) *Interactive Data Analysis*. Wiley." + ) + ).as_latex() +) +``` + + +```{python} +# | output: asis +# | echo: false + +airquality_mini = airquality.head(10).assign(Year = 1973) + +print( + ( + GT(airquality_mini) + .tab_header( + title="New York Air Quality Measurements", + subtitle="Daily measurements in New York City (May 1-10, 1973)" + ) + ).as_latex() +) +``` + + +```{python} +# | output: asis +# | echo: false + +constants_mini = ( + pl.from_pandas(constants) + .filter(pl.col("name").str.contains("molar")).sort("value") + .with_columns( + name=pl.col("name") + .str.to_titlecase() + .str.replace("Kpa", "kpa") + .str.replace("Of", "of") + ) +) + +print( + GT(constants_mini) + .cols_hide(columns=["uncert", "sf_value", "sf_uncert", "units"]) + .fmt_scientific(columns="value", decimals=3) + .tab_header(title="Physical Constants Having a Molar Basis") + .tab_options(column_labels_hidden=True) + .as_latex() +) +``` + + +```{python} +# | output: asis +# | echo: false + +exibble_mini = ( + pl.from_pandas(exibble) + .select(["num", "date", "time", "currency"]) + .head(5) + .with_columns(time=pl.concat_str("[" + pl.col("time") + "]")) + .with_columns(num=pl.col("num") * 1000) +) + +print( + GT(exibble_mini) + .fmt_bytes(columns="num", standard="binary") + .fmt_currency(columns="currency", rows=[2, 3, 4], pattern="_{x}_") + .fmt_date(columns="date", date_style="wday_month_day_year") + .as_latex() +) +``` + + +```{python} + +import polars as pl +from great_tables import GT, md, html +from great_tables.data import islands + +islands_mini = pl.from_pandas(islands).sort("size", descending=True).head(10) + +print( + GT(islands_mini) + .tab_header(title="Large Landmasses of the World", subtitle="The top ten largest are presented") + # .tab_stub(rowname_col="name") + .tab_source_note(source_note="Source: The World Almanac and Book of Facts, 1975, page 406.") + .tab_source_note( + # source_note=md("Reference: McNeil, D. R. (1977) *Interactive Data Analysis*. Wiley.") + source_note=html("Reference: McNeil, D. R. (1977) *Interactive Data Analysis*. Wiley.$") + ) + .tab_stubhead(label="landmass") + .fmt_image(columns="size") + .as_latex() +) +``` + +```{python} +from great_tables import GT, html +from great_tables.data import airquality + +airquality_mini = airquality.head(10).assign(Year=1973) + +print( + GT(airquality_mini) + .tab_header( + title="New York Air Quality Measurements", + subtitle="Daily measurements in New York City (May 1-10, 1973)", + ) + .tab_spanner(label="Time", columns=["Year", "Month", "Day"]) + .tab_spanner(label="Measurement", columns=["Ozone", "Solar_R", "Wind", "Temp"]) + .cols_move_to_start(columns=["Year", "Month", "Day"]) + .cols_label( + Ozone=html("Ozone,
ppbV"), + Solar_R=html("Solar R.,
cal/m2"), + Wind=html("Wind,
mph"), + Temp=html("Temp,
°F"), + ) + .as_latex() +) +``` + +```{python} +#| eval: false + +from great_tables import GT +from great_tables.data import countrypops +import polars as pl +import polars.selectors as cs + +# Get vectors of 2-letter country codes for each region of Oceania +oceania = { + "Australasia": ["AU", "NZ"], + "Melanesia": ["NC", "PG", "SB", "VU"], + "Micronesia": ["FM", "GU", "KI", "MH", "MP", "NR", "PW"], + "Polynesia": ["PF", "WS", "TO", "TV"], +} + +# Create a dictionary mapping country to region (e.g. AU -> Australasia) +country_to_region = { + country: region for region, countries in oceania.items() for country in countries +} + +wide_pops = ( + pl.from_pandas(countrypops) + .filter( + pl.col("country_code_2").is_in(list(country_to_region)) + & pl.col("year").is_in([2000, 2010, 2020]) + ) + .with_columns(pl.col("country_code_2").replace(country_to_region).alias("region")) + .pivot(index=["country_name", "region"], on="year", values="population") + .sort("2020", descending=True) +) + +print( + GT(wide_pops) + .tab_header(title="Populations of Oceania's Countries in 2000, 2010, and 2020") + .tab_spanner(label="Total Population", columns=cs.all()) + #.tab_stub(rowname_col="country_name", groupname_col="region") + .fmt_integer() # example fails because of this method + .as_latex() +) +``` + + +towny example + +```{python} +# | echo: false +# | eval: false + +from great_tables import GT, html +from great_tables.data import towny + +towny_mini = ( + towny[["name", "website", "density_2021", "land_area_km2", "latitude", "longitude"]] + .sort_values("density_2021", ascending=False) + .head(10) +) + +towny_mini["url_name"] = ["["] + towny_mini["name"] + ["]"] + ["("] + towny_mini["website"] + [")"] + +towny_mini["location"] = ( + ["[map](http://maps.google.com/?ie=UTF8&hq=&ll="] + + towny_mini["latitude"].astype(str) + + [","] + + towny_mini["longitude"].astype(str) + + ["&z=13)"] +) + +print( + GT( + towny_mini[["url_name", "location", "land_area_km2", "density_2021"]], + # rowname_col="url_name", + ) + .tab_header( + title="The Municipalities of Ontario", + subtitle="The top 10 highest population density in 2021", + ) + .tab_stubhead(label="Municipality") + .fmt_markdown( + columns=["url_name", "location"] + ) # fmt_markdown() in LaTeX context doesn't Latex escape chars + .fmt_number(columns=["land_area_km2", "density_2021"], decimals=1) + .cols_label( + land_area_km2=html("land area,
km2"), + density_2021=html("density,
people/km2"), + ) + .as_latex() +) +``` + +```{python} +from great_tables import GT, html +from great_tables.data import sza +import polars as pl +import polars.selectors as cs + +sza_pivot = ( + pl.from_pandas(sza) + .filter((pl.col("latitude") == "20") & (pl.col("tst") <= "1200")) + .select(pl.col("*").exclude("latitude")) + .drop_nulls() + .pivot(values="sza", index="month", on="tst", sort_columns=True) +) + +print( + GT( + sza_pivot, + #rowname_col="month" + ) + .data_color( + domain=[90, 0], + palette=["rebeccapurple", "white", "orange"], + na_color="white", + ) + .tab_header( + title="Solar Zenith Angles from 05:30 to 12:00", + subtitle=html("Average monthly values at latitude of 20°N."), + ) + .sub_missing(missing_text="") + .as_latex() +) +``` + + + + +```{python} + + +# | echo: false +from great_tables import GT, md, html, system_fonts +import pandas as pd + +power_cie_prepared_tbl = pd.read_csv("../examples/_data/power_cie_prepared_tbl.csv") + + +# Create a Great Tables object +ciep_gt_tbl = GT(data=power_cie_prepared_tbl) + +# Apply wider color ranges & formatting + +print( + ciep_gt_tbl.fmt_percent( + columns=[ + "Hydro", + "Nuclear", + "Wind", + "Solar", + "Geothermal", + "Biomass", + "Gas", + "Coal", + "Oil", + "Unknown", + "Hydro Discharge", + "Battery Discharge", + ], + decimals=1, + ) + .fmt_number(columns=["CO2 Intensity"], decimals=0) + .data_color( + columns=["CO2 Intensity"], + palette=[ + "#00A600", + "#E6E600", + "#E8C32E", + "#D69C4E", + "#Dc863B", + "sienna", + "sienna4", + "tomato4", + "brown", + ], + domain=[0, 900], + ) + .data_color( + columns=["Hydro", "Nuclear", "Wind", "Solar", "Geothermal"], + palette=["#00A600", "chartreuse3", "chartreuse4", "snow"][::-1], + domain=[0, 1], + ) + .data_color( + columns=["Hydro", "Geothermal"], + palette=["#00A600", "chartreuse3", "chartreuse4", "snow"][::-1], + domain=[0, 1], + ) + .data_color( + columns=["Biomass"], palette=["snow", "#EEC900", "#E8C32E", "#D69C4E"], domain=[0, 0.3] + ) + .data_color( + columns=["Gas", "Coal", "Oil"], + palette=["tomato4", "sienna4", "#D69C4E", "#Dc863B", "snow"][::-1], + domain=[0, 1], + ) + .data_color( + columns=["Zone", "Unknown", "Hydro Discharge", "Battery Discharge"], + palette=["snow", "snow", "snow", "snow"], + ) + .cols_width( + { + "CO2 Intensity": "58px", + "Hydro": "58px", + "Nuclear": "58px", + "Wind": "58px", + "Solar": "58px", + "Geothermal": "58px", + "Biomass": "58px", + "Gas": "58px", + "Coal": "58px", + "Oil": "58px", + "Unknown": "58px", + "Hydro Discharge": "58px", + "Battery Discharge": "58px", + } + ) + .tab_header( + # title=md( + # "2023 Mean **Carbon Intensity** (gCO2eq/kWh) and **Power Consumption** Breakdown (%)" + # ) + title="2023 Mean **Carbon Intensity** (gCO2eq/kWh) and **Power Consumption** Breakdown (%)" + ) + # .tab_source_note( + # md( + # '
' + # "**Source**: api.electricitymap.org" + # " | **Methodology**: https://www.electricitymaps.com/methodology." + # " Some emissions factors are based on IPCC 2014 defaults, while some are based on more #accurate regional factors." + # "
All zones are publicly available on the *Carbon intensity and emission factors* tab #via Google docs link
" + # "
" + # "
" + # ) + # ) + .tab_options( + source_notes_font_size="x-small", + source_notes_padding=3, + table_font_names=system_fonts("humanist"), + data_row_padding="1px", + heading_background_color="antiquewhite", + source_notes_background_color="antiquewhite", + column_labels_background_color="antiquewhite", + table_background_color="snow", + data_row_padding_horizontal=3, + column_labels_padding_horizontal=58, + ) + .cols_align(align="center") + .cols_align(align="left", columns=["Zone"]) + .opt_table_outline() + .as_latex() +) +``` + + + + +```{python} +import polars as pl +import polars.selectors as cs +from great_tables import GT, loc, style + +coffee_sales = pl.DataFrame.deserialize("../examples/_data/coffee-sales.json", format="json") + +sel_rev = cs.starts_with("revenue") +sel_prof = cs.starts_with("profit") + +# yo + +print( + GT(coffee_sales) + .tab_header("Sales of Coffee Equipment") + .tab_spanner(label="Revenue", columns=sel_rev) + .tab_spanner(label="Profit", columns=sel_prof) + .cols_label( + revenue_dollars="Amount", + profit_dollars="Amount", + revenue_pct="Percent", + profit_pct="Percent", + monthly_sales="Monthly Sales", + icon="", + product="Product", + ) + # formatting ---- + .fmt_number( + columns=cs.ends_with("dollars"), + compact=True, + pattern="${x}", + n_sigfig=3, + ) + .fmt_percent(columns=cs.ends_with("pct"), decimals=0) + # style ---- + .tab_style( + style=style.fill(color="aliceblue"), + locations=loc.body(columns=sel_rev), + ) + .tab_style( + style=style.fill(color="papayawhip"), + locations=loc.body(columns=sel_prof), + ) + .tab_style( + style=style.text(weight="bold"), + locations=loc.body(rows=pl.col("product") == "Total"), + ) + # .fmt_nanoplot("monthly_sales", plot_type="bar") + #.fmt_image("icon", path="docs/examples/_data/coffee-table-icons/") + .sub_missing(missing_text="") + .as_latex() +) +``` diff --git a/great_tables/_boxhead.py b/great_tables/_boxhead.py index 32b1b827b..b204d1ce8 100644 --- a/great_tables/_boxhead.py +++ b/great_tables/_boxhead.py @@ -5,14 +5,14 @@ from ._locations import resolve_cols_c from ._utils import _assert_list_is_subset from ._tbl_data import SelectExpr -from ._text import Text +from ._text import BaseText if TYPE_CHECKING: from ._types import GTSelf def cols_label( - self: GTSelf, cases: dict[str, str | Text] | None = None, **kwargs: str | Text + self: GTSelf, cases: dict[str, str | BaseText] | None = None, **kwargs: str | BaseText ) -> GTSelf: """ Relabel one or more columns. @@ -132,7 +132,7 @@ def cols_label( _assert_list_is_subset(mod_columns, set_list=column_names) # Handle units syntax in labels (e.g., "Density ({{ppl / mi^2}})") - new_kwargs: dict[str, UnitStr | str | Text] = {} + new_kwargs: dict[str, UnitStr | str | BaseText] = {} for k, v in new_cases.items(): @@ -145,12 +145,12 @@ def cols_label( else: new_kwargs[k] = unitstr_v - elif isinstance(v, Text): + elif isinstance(v, BaseText): new_kwargs[k] = v else: raise ValueError( - "Column labels must be strings or Text objects. Use `md()` or `html()` for formatting." + "Column labels must be strings or BaseText objects. Use `md()` or `html()` for formatting." ) boxhead = self._boxhead._set_column_labels(new_kwargs) diff --git a/great_tables/_export.py b/great_tables/_export.py index 0e033dd03..29fc31577 100644 --- a/great_tables/_export.py +++ b/great_tables/_export.py @@ -12,6 +12,8 @@ from typing_extensions import TypeAlias from ._utils import _try_import +from ._utils_render_latex import _render_as_latex + if TYPE_CHECKING: # Note that as_raw_html uses methods on the GT class, not just data @@ -166,6 +168,98 @@ def as_raw_html( return html_table +def as_latex(self: GT, use_longtable: bool = False, tbl_pos: str | None = None) -> str: + """ + Output a GT object as LaTeX + + The `as_latex()` method outputs a GT object as a LaTeX fragment. This method is useful for when + you need to include a table as part of a LaTeX document. The LaTeX fragment contains the table + as a string. + + :::{.callout-warning} + `as_latex()` is still experimental. + ::: + + Parameters + ---------- + + use_longtable + An option to use the `longtable` environment in LaTeX output. This is useful for tables that + span multiple pages and don't require precise positioning. + tbl_pos + The position of the table in the LaTeX output when `use_longtable=False`. Valid values for + positioning include `"!t"` (top of page), `"!b"` (bottom of the page), `"!h"` (here), + `"!p"` (on a separate page), and `"!H"` (exactly here). If a value is not provided then the + table will be placed at the top of the page; if in the Quarto render then the table + positioning option will be ignored in favor of any setting within the Quarto rendering + environment. + + Returns + ------- + str + A LaTeX fragment that contains the table. + + Limitations + ----------- + The `as_latex()` method is still experimental and has some limitations. The following + functionality that is supported in HTML output tables is not currently supported in LaTeX + output tables: + + - the rendering of the stub and row group labels (via the `=rowname_col` and `=groupname_col` + args in the `GT()` class) + - the use of the `md()` helper function to signal conversion of Markdown text + - units notation within the `cols_labels()` and `tab_spanner()` methods + - the `fmt_markdown()`, `fmt_units()`, `fmt_image()`, and `fmt_nanoplot()` methods + - the `sub_missing()` and `sub_zero()` methods + - most options in the `tab_options()` method, particularly those that are specific to styling + text, borders, or adding fill colors to cells + + As development continues, we will work to expand the capabilities of the `as_latex()` method to + reduce these limitations and more clearly document what is and is not supported. + + Examples + -------- + Let's use a subset of the `gtcars` dataset to create a new table. + + ```{python} + from great_tables import GT + from great_tables.data import gtcars + import polars as pl + + gtcars_mini = ( + pl.from_pandas(gtcars) + .select(["mfr", "model", "msrp"]) + .head(5) + ) + + gt_tbl = ( + GT(gtcars_mini) + .tab_header( + title="Data Listing from the gtcars Dataset", + subtitle="Only five rows from the dataset are shown here." + ) + .fmt_currency(columns="msrp") + ) + + gt_tbl + ``` + + Now we can return the table as string of LaTeX code using the `as_latex()` method. + + ```{python} + gt_tbl.as_latex() + ``` + + The LaTeX string contains the code just for the table (it's not a complete LaTeX document). + This output can be useful for embedding a GT table in an existing LaTeX document. + """ + built_table = self._build_data(context="latex") + + latex_table = _render_as_latex(data=built_table, use_longtable=use_longtable, tbl_pos=tbl_pos) + + return latex_table + + # Create a list of all selenium webdrivers WebDrivers: TypeAlias = Literal[ "chrome", diff --git a/great_tables/_formats.py b/great_tables/_formats.py index d061162d4..cc8cc63d9 100644 --- a/great_tables/_formats.py +++ b/great_tables/_formats.py @@ -5,6 +5,7 @@ from dataclasses import dataclass from datetime import date, datetime, time from decimal import Decimal +from functools import partial from pathlib import Path from typing import TYPE_CHECKING, Any, Callable, ClassVar, Literal, TypedDict, TypeVar, cast @@ -26,7 +27,7 @@ to_list, _get_column_dtype, ) -from ._text import _md_html +from ._text import _md_html, escape_pattern_str_latex from ._utils import _str_detect, _str_replace from ._utils_nanoplots import _generate_nanoplot @@ -285,66 +286,90 @@ def fmt_number( sep_mark = _get_locale_sep_mark(default=sep_mark, use_seps=use_seps, locale=locale) dec_mark = _get_locale_dec_mark(default=dec_mark, locale=locale) - # Generate a function that will operate on single `x` values in the table body - def fmt_number_fn( - x: float | None, - decimals: int = decimals, - n_sigfig: int | None = n_sigfig, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - scale_by: float = scale_by, - compact: bool = compact, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - ): - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_number_context, + data=self, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + compact=compact, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + pattern=pattern, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - - # Determine whether the value is positive - is_negative = _has_negative_value(value=x) - - if compact: - x_formatted = _format_number_compactly( - value=x, - decimals=decimals, - n_sigfig=n_sigfig, - drop_trailing_zeros=drop_trailing_zeros, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) - else: - x_formatted = _value_to_decimal_notation( - value=x, - decimals=decimals, - n_sigfig=n_sigfig, - drop_trailing_zeros=drop_trailing_zeros, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) - - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) - return x_formatted +def fmt_number_context( + x: float | None, + data: GTData, + decimals: int, + n_sigfig: int | None, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + use_seps: bool, + scale_by: float, + compact: bool, + sep_mark: str, + dec_mark: str, + force_sign: bool, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x + + # Scale `x` value by a defined `scale_by` value + x = x * scale_by + + # Determine whether the value is positive + is_negative = _has_negative_value(value=x) + + if compact: + x_formatted = _format_number_compactly( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) + else: + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) + + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context=context) + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": - return fmt(self, fns=fmt_number_fn, columns=columns, rows=rows) + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_integer( @@ -458,59 +483,81 @@ def fmt_integer( # Use locale-based marks if a locale ID is provided sep_mark = _get_locale_sep_mark(default=sep_mark, use_seps=use_seps, locale=locale) - # Generate a function that will operate on single `x` values in - # the table body - def fmt_integer_fn( - x: float, - scale_by: float = scale_by, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_integer_context, + data=self, + use_seps=use_seps, + scale_by=scale_by, + compact=compact, + sep_mark=sep_mark, + force_sign=force_sign, + pattern=pattern, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by - - # Determine whether the value is positive - is_negative = _has_negative_value(value=x) - - if compact: - x_formatted = _format_number_compactly( - value=x, - decimals=0, - n_sigfig=None, - drop_trailing_zeros=False, - drop_trailing_dec_mark=True, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark="not used", - force_sign=force_sign, - ) - else: - x_formatted = _value_to_decimal_notation( - value=x, - decimals=0, - n_sigfig=None, - drop_trailing_zeros=False, - drop_trailing_dec_mark=True, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark="not used", - force_sign=force_sign, - ) - - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) - return x_formatted +def fmt_integer_context( + x: float | None, + data: GTData, + use_seps: bool, + scale_by: float, + compact: bool, + sep_mark: str, + force_sign: bool, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x + + # Scale `x` value by a defined `scale_by` value + x = x * scale_by + + # Determine whether the value is positive + is_negative = _has_negative_value(value=x) + + if compact: + x_formatted = _format_number_compactly( + value=x, + decimals=0, + n_sigfig=None, + drop_trailing_zeros=False, + drop_trailing_dec_mark=True, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark="not used", + force_sign=force_sign, + ) + + else: + x_formatted = _value_to_decimal_notation( + value=x, + decimals=0, + n_sigfig=None, + drop_trailing_zeros=False, + drop_trailing_dec_mark=True, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark="not used", + force_sign=force_sign, + ) - return fmt(self, fns=fmt_integer_fn, columns=columns, rows=rows) + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context=context) + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_scientific( @@ -663,112 +710,136 @@ def fmt_scientific( sep_mark = _get_locale_sep_mark(default=sep_mark, use_seps=use_seps, locale=locale) dec_mark = _get_locale_dec_mark(default=dec_mark, locale=locale) - # Generate a function that will operate on single `x` values in the table body - def fmt_scientific_fn( - x: float, - decimals: int = decimals, - n_sigfig: int | None = n_sigfig, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - scale_by: float = scale_by, - exp_style: str = exp_style, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign_m: bool = force_sign_m, - force_sign_n: bool = force_sign_n, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_scientific_context, + data=self, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + scale_by=scale_by, + exp_style=exp_style, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign_m=force_sign_m, + force_sign_n=force_sign_n, + pattern=pattern, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Determine whether the value is positive - is_positive = _has_positive_value(value=x) - minus_mark = _context_minus_mark() +# Generate a function that will operate on single `x` values in the table body +def fmt_scientific_context( + x: float | None, + data: GTData, + decimals: int, + n_sigfig: int | None, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + scale_by: float, + exp_style: str, + sep_mark: str, + dec_mark: str, + force_sign_m: bool, + force_sign_n: bool, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - x_sci_notn = _value_to_scientific_notation( - value=x, - decimals=decimals, - n_sigfig=n_sigfig, - dec_mark=dec_mark, - ) + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - sci_parts = x_sci_notn.split("E") + # Determine whether the value is positive + is_positive = _has_positive_value(value=x) - m_part, n_part = sci_parts + minus_mark = _context_minus_mark(context=context) - # Remove trailing zeros and decimal marks from the `m_part` - if drop_trailing_zeros: - m_part = m_part.rstrip("0") - if drop_trailing_dec_mark: - m_part = m_part.rstrip(".") + x_sci_notn = _value_to_scientific_notation( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + dec_mark=dec_mark, + ) - # Force the positive sign to be present if the `force_sign_m` option is taken - if is_positive and force_sign_m: - m_part = "+" + m_part + sci_parts = x_sci_notn.split("E") - if exp_style == "x10n": - # Define the exponent string based on the `exp_style` that is the default - # ('x10n'); this is styled as 'x 10^n' instead of using a fixed symbol like 'E' + m_part, n_part = sci_parts - # Determine which values don't require the (x 10^n) for scientific formatting - # since their order would be zero - small_pos = _has_sci_order_zero(value=x) + # Remove trailing zeros and decimal marks from the `m_part` + if drop_trailing_zeros: + m_part = m_part.rstrip("0") + if drop_trailing_dec_mark: + m_part = m_part.rstrip(".") - # Force the positive sign to be present if the `force_sign_n` option is taken - if force_sign_n and not _str_detect(n_part, "-"): - n_part = "+" + n_part + # Force the positive sign to be present if the `force_sign_m` option is taken + if is_positive and force_sign_m: + m_part = "+" + m_part - # Implement minus sign replacement for `m_part` and `n_part` - m_part = _replace_minus(m_part, minus_mark=minus_mark) - n_part = _replace_minus(n_part, minus_mark=minus_mark) + if exp_style == "x10n": + # Define the exponent string based on the `exp_style` that is the default + # ('x10n'); this is styled as 'x 10^n' instead of using a fixed symbol like 'E' - if small_pos: - # If the value is small enough to not require the (x 10^n) notation, then - # the formatted value is based on only the `m_part` - x_formatted = m_part - else: - # Get the set of exponent marks, which are used to decorate the `n_part` - exp_marks = _context_exp_marks() + # Determine which values don't require the (x 10^n) for scientific formatting + # since their order would be zero + small_pos = _has_sci_order_zero(value=x) + + # Force the positive sign to be present if the `force_sign_n` option is taken + if force_sign_n and not _str_detect(n_part, "-"): + n_part = "+" + n_part - # Create the formatted string based on `exp_marks` and the two `sci_parts` - x_formatted = m_part + exp_marks[0] + n_part + exp_marks[1] + # Implement minus sign replacement for `m_part` and `n_part` + m_part = _replace_minus(m_part, minus_mark=minus_mark) + n_part = _replace_minus(n_part, minus_mark=minus_mark) + if small_pos: + # If the value is small enough to not require the (x 10^n) notation, then + # the formatted value is based on only the `m_part` + x_formatted = m_part else: - # Define the exponent string based on the `exp_style` that's not the default - # value of 'x10n' + # Get the set of exponent marks, which are used to decorate the `n_part` + exp_marks = _context_exp_marks(context=context) - exp_str = _context_exp_str(exp_style=exp_style) + # Create the formatted string based on `exp_marks` and the two `sci_parts` + x_formatted = m_part + exp_marks[0] + n_part + exp_marks[1] - n_min_width = 1 if _str_detect(exp_style, r"^[a-zA-Z]1$") else 2 + else: + # Define the exponent string based on the `exp_style` that's not the default + # value of 'x10n' - # The `n_part` will be extracted here and it must be padded to - # the defined minimum number of decimal places - if _str_detect(n_part, "-"): - n_part = _str_replace(n_part, "-", "") - n_part = n_part.ljust(n_min_width, "0") - n_part = "-" + n_part - else: - n_part = n_part.ljust(n_min_width, "0") - if force_sign_n: - n_part = "+" + n_part + exp_str = _context_exp_str(exp_style=exp_style) + + n_min_width = 1 if _str_detect(exp_style, r"^[a-zA-Z]1$") else 2 - # Implement minus sign replacement for `m_part` and `n_part` - m_part = _replace_minus(m_part, minus_mark=minus_mark) - n_part = _replace_minus(n_part, minus_mark=minus_mark) + # The `n_part` will be extracted here and it must be padded to + # the defined minimum number of decimal places + if _str_detect(n_part, "-"): + n_part = _str_replace(n_part, "-", "") + n_part = n_part.ljust(n_min_width, "0") + n_part = "-" + n_part + else: + n_part = n_part.ljust(n_min_width, "0") + if force_sign_n: + n_part = "+" + n_part - x_formatted = m_part + exp_str + n_part + # Implement minus sign replacement for `m_part` and `n_part` + m_part = _replace_minus(m_part, minus_mark=minus_mark) + n_part = _replace_minus(n_part, minus_mark=minus_mark) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + x_formatted = m_part + exp_str + n_part - return x_formatted + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) - return fmt(self, fns=fmt_scientific_fn, columns=columns, rows=rows) + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_percent( @@ -919,72 +990,100 @@ def fmt_percent( else: scale_by = 1.0 - # Generate a function that will operate on single `x` values in the table body - def fmt_percent_fn( - x: float, - decimals: int = decimals, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - scale_by: float = scale_by, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - placement: str = placement, - incl_space: bool = incl_space, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_percent_context, + data=self, + decimals=decimals, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Determine properties of the value - is_negative = _has_negative_value(value=x) - is_positive = _has_positive_value(value=x) - x_formatted = _value_to_decimal_notation( - value=x, - decimals=decimals, - n_sigfig=None, - drop_trailing_zeros=drop_trailing_zeros, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) +def fmt_percent_context( + x: float | None, + data: GTData, + decimals: int, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + use_seps: bool, + scale_by: float, + sep_mark: str, + dec_mark: str, + force_sign: bool, + placement: str, + incl_space: bool, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - # Create a percent pattern for affixing the percent sign - space_character = " " if incl_space else "" - percent_pattern = ( - f"{{x}}{space_character}%" if placement == "right" else f"%{space_character}{{x}}" - ) + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - if is_negative and placement == "left": - x_formatted = x_formatted.replace("-", "") - x_formatted = percent_pattern.replace("{x}", x_formatted) - x_formatted = "-" + x_formatted - elif is_positive and force_sign and placement == "left": - x_formatted = x_formatted.replace("+", "") - x_formatted = percent_pattern.replace("{x}", x_formatted) - x_formatted = "+" + x_formatted - else: - x_formatted = percent_pattern.replace("{x}", x_formatted) + # Determine properties of the value + is_negative = _has_negative_value(value=x) + is_positive = _has_positive_value(value=x) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=None, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Get the context-specific percent mark + percent_mark = _context_percent_mark(context=context) - return x_formatted + # Create a percent pattern for affixing the percent sign + space_character = " " if incl_space else "" + percent_pattern = ( + f"{{x}}{space_character}{percent_mark}" + if placement == "right" + else f"{percent_mark}{space_character}{{x}}" + ) + + if is_negative and placement == "left": + x_formatted = x_formatted.replace("-", "") + x_formatted = percent_pattern.replace("{x}", x_formatted) + x_formatted = "-" + x_formatted + elif is_positive and force_sign and placement == "left": + x_formatted = x_formatted.replace("+", "") + x_formatted = percent_pattern.replace("{x}", x_formatted) + x_formatted = "+" + x_formatted + else: + x_formatted = percent_pattern.replace("{x}", x_formatted) + + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context="html") + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": - return fmt(self, fns=fmt_percent_fn, columns=columns, rows=rows) + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_currency( @@ -1151,79 +1250,105 @@ def fmt_currency( currency=currency_resolved, decimals=decimals, use_subunits=use_subunits ) - # Generate a function that will operate on single `x` values in the table body - def fmt_currency_fn( - x: float, - currency: str = currency_resolved, - decimals: int = decimals, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - scale_by: float = scale_by, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - placement: str = placement, - incl_space: bool = incl_space, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_currency_context, + data=self, + currency=currency_resolved, + decimals=decimals, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + scale_by=scale_by, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + placement=placement, + incl_space=incl_space, + pattern=pattern, + ) - # Scale `x` value by a defined `scale_by` value - x = x * scale_by + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Determine properties of the value - is_negative = _has_negative_value(value=x) - is_positive = _has_positive_value(value=x) - # Get the currency symbol on the basis of a valid currency code - currency_symbol = _get_currency_str(currency=currency) +def fmt_currency_context( + x: float | None, + data: GTData, + currency: str, + decimals: int, + drop_trailing_dec_mark: bool, + use_seps: bool, + scale_by: float, + sep_mark: str, + dec_mark: str, + force_sign: bool, + placement: str, + incl_space: bool, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - # Format the value to decimal notation; this is done before the currency symbol is - # affixed to the value - x_formatted = _value_to_decimal_notation( - value=x, - decimals=decimals, - n_sigfig=None, - drop_trailing_zeros=False, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) + # Scale `x` value by a defined `scale_by` value + x = x * scale_by - # Create a currency pattern for affixing the currency symbol - space_character = " " if incl_space else "" - currency_pattern = ( - f"{{x}}{space_character}{currency_symbol}" - if placement == "right" - else f"{currency_symbol}{space_character}{{x}}" - ) + # Determine properties of the value + is_negative = _has_negative_value(value=x) + is_positive = _has_positive_value(value=x) - if is_negative and placement == "left": - x_formatted = x_formatted.replace("-", "") - x_formatted = currency_pattern.replace("{x}", x_formatted) - x_formatted = "-" + x_formatted - elif is_positive and force_sign and placement == "left": - x_formatted = x_formatted.replace("+", "") - x_formatted = currency_pattern.replace("{x}", x_formatted) - x_formatted = "+" + x_formatted - else: - x_formatted = currency_pattern.replace("{x}", x_formatted) + # Get the currency symbol on the basis of a valid currency code + currency_symbol = _get_currency_str(currency=currency) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + if currency_symbol == "$": + currency_symbol = _context_dollar_mark(context=context) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Format the value to decimal notation; this is done before the currency symbol is + # affixed to the value + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=None, + drop_trailing_zeros=False, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) - return x_formatted + # Create a currency pattern for affixing the currency symbol + space_character = " " if incl_space else "" + currency_pattern = ( + f"{{x}}{space_character}{currency_symbol}" + if placement == "right" + else f"{currency_symbol}{space_character}{{x}}" + ) - return fmt(self, fns=fmt_currency_fn, columns=columns, rows=rows) + if is_negative and placement == "left": + x_formatted = x_formatted.replace("-", "") + x_formatted = currency_pattern.replace("{x}", x_formatted) + x_formatted = "-" + x_formatted + elif is_positive and force_sign and placement == "left": + x_formatted = x_formatted.replace("+", "") + x_formatted = currency_pattern.replace("{x}", x_formatted) + x_formatted = "+" + x_formatted + else: + x_formatted = currency_pattern.replace("{x}", x_formatted) + + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context=context) + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_roman( @@ -1290,50 +1415,64 @@ def fmt_roman( # Check that the `case` value is valid and only consists of the string 'upper' or 'lower' _validate_case(case=case) - # Generate a function that will operate on single `x` values in the table body - def fmt_roman_fn( - x: float, - case: str = case, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_roman_context, + data=self, + case=case, + pattern=pattern, + ) - # Get the absolute value of `x` so that negative values are handled - x = abs(x) + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Round x to 0 digits with the R-H-U method of rounding (for reproducibility purposes) - x = _round_rhu(x, 0) - # Determine if `x` is in the range of 1 to 3899 and if it is zero - x_is_in_range = x > 0 and x < 3900 - x_is_zero = x == 0 +def fmt_roman_context( + x: float, + data: GTData, + case: str, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x + + # Get the absolute value of `x` so that negative values are handled + x = abs(x) + + # Round x to 0 digits with the R-H-U method of rounding (for reproducibility purposes) + x = _round_rhu(x, 0) + + # Determine if `x` is in the range of 1 to 3899 and if it is zero + x_is_in_range = x > 0 and x < 3900 + x_is_zero = x == 0 + + if not x_is_in_range and not x_is_zero: + # We cannot format a 'large' integer to roman numerals, so we return a string + # that indicates this + return "ex terminis" + elif x_is_zero: + # Zero is a special case and is handled separately with the character 'N' + # which stands for 'nulla' (i.e., 'nothing') + x_formatted = "N" + else: + # All other values are formatted with the `_as_roman()` utility function + x_formatted = _as_roman(x) - if not x_is_in_range and not x_is_zero: - # We cannot format a 'large' integer to roman numerals, so we return a string - # that indicates this - return "ex terminis" - elif x_is_zero: - # Zero is a special case and is handled separately with the character 'N' - # which stands for 'nulla' (i.e., 'nothing') - x_formatted = "N" - else: - # All other values are formatted with the `_as_roman()` utility function - x_formatted = _as_roman(x) + # Transform the case of the formatted value + if case == "upper": + pass + else: + x_formatted = x_formatted.lower() - # Transform the case of the formatted value - if case == "upper": - pass - else: - x_formatted = x_formatted.lower() + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) - return x_formatted + x_formatted = pattern.replace("{x}", x_formatted) - return fmt(self, fns=fmt_roman_fn, columns=columns, rows=rows) + return x_formatted def fmt_bytes( @@ -1486,88 +1625,112 @@ def fmt_bytes( base = 1024 byte_units = ["B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB"] - # Generate a function that will operate on single `x` values in the table body - def fmt_bytes_fn( - x: float, - base: int = base, - byte_units: list[str] = byte_units, - decimals: int = decimals, - n_sigfig: int | None = n_sigfig, - drop_trailing_zeros: bool = drop_trailing_zeros, - drop_trailing_dec_mark: bool = drop_trailing_dec_mark, - use_seps: bool = use_seps, - sep_mark: str = sep_mark, - dec_mark: str = dec_mark, - force_sign: bool = force_sign, - incl_space: bool = incl_space, - ): - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_bytes_context, + data=self, + base=base, + byte_units=byte_units, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + incl_space=incl_space, + pattern=pattern, + ) - # Truncate all byte values by casting to an integer; this is done because bytes - # are always whole numbers - x = int(x) + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Determine properties of the value - is_negative = _has_negative_value(value=x) - # Determine the power index for the value - if x == 0: - # If the value is zero, then the power index is 1; otherwise, we'd get - # an error when trying to calculate the log of zero - num_power_idx = 1 - else: - # Otherwise, we can calculate the power index by taking the log of the value - # and dividing by the log of the base; we add 1 to the result to account for - # the fact that the power index is 1-based (i.e., the first element in the - # `byte_units` list is at index 0) --- the final statement ensures that the - # power index is always at least 1 - num_power_idx = math.floor(math.log(abs(x), base)) + 1 - num_power_idx = max(1, min(len(byte_units), num_power_idx)) - - # The `units_str` is obtained by indexing the `byte_units` list with the `num_power_idx` - # value; this is the string that will be affixed to the formatted value - units_str = byte_units[num_power_idx - 1] - - # Scale `x` value by a defined `base` value, this is done by dividing by the - # `base` value raised to the power index minus 1 (we subtract 1 because the - # power index is 1-based) - x = x / base ** (num_power_idx - 1) - - # Format the value to decimal notation; this is done before the `byte_units` text - # is affixed to the value - x_formatted = _value_to_decimal_notation( - value=x, - decimals=decimals, - n_sigfig=n_sigfig, - drop_trailing_zeros=drop_trailing_zeros, - drop_trailing_dec_mark=drop_trailing_dec_mark, - use_seps=use_seps, - sep_mark=sep_mark, - dec_mark=dec_mark, - force_sign=force_sign, - ) +def fmt_bytes_context( + x: float, + data: GTData, + base: int, + byte_units: list[str], + decimals: int, + n_sigfig: int | None, + drop_trailing_zeros: bool, + drop_trailing_dec_mark: bool, + use_seps: bool, + sep_mark: str, + dec_mark: str, + force_sign: bool, + incl_space: bool, + pattern: str, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - # Create a `bytes_pattern` object for affixing the `units_str`, which is the - # string that represents the byte units - space_character = " " if incl_space else "" - bytes_pattern = f"{{x}}{space_character}{units_str}" + # Truncate all byte values by casting to an integer; this is done because bytes + # are always whole numbers + x = int(x) - x_formatted = bytes_pattern.replace("{x}", x_formatted) + # Determine properties of the value + is_negative = _has_negative_value(value=x) - # Implement minus sign replacement for `x_formatted` - if is_negative: - minus_mark = _context_minus_mark() - x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + # Determine the power index for the value + if x == 0: + # If the value is zero, then the power index is 1; otherwise, we'd get + # an error when trying to calculate the log of zero + num_power_idx = 1 + else: + # Otherwise, we can calculate the power index by taking the log of the value + # and dividing by the log of the base; we add 1 to the result to account for + # the fact that the power index is 1-based (i.e., the first element in the + # `byte_units` list is at index 0) --- the final statement ensures that the + # power index is always at least 1 + num_power_idx = math.floor(math.log(abs(x), base)) + 1 + num_power_idx = max(1, min(len(byte_units), num_power_idx)) + + # The `units_str` is obtained by indexing the `byte_units` list with the `num_power_idx` + # value; this is the string that will be affixed to the formatted value + units_str = byte_units[num_power_idx - 1] + + # Scale `x` value by a defined `base` value, this is done by dividing by the + # `base` value raised to the power index minus 1 (we subtract 1 because the + # power index is 1-based) + x = x / base ** (num_power_idx - 1) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + # Format the value to decimal notation; this is done before the `byte_units` text + # is affixed to the value + x_formatted = _value_to_decimal_notation( + value=x, + decimals=decimals, + n_sigfig=n_sigfig, + drop_trailing_zeros=drop_trailing_zeros, + drop_trailing_dec_mark=drop_trailing_dec_mark, + use_seps=use_seps, + sep_mark=sep_mark, + dec_mark=dec_mark, + force_sign=force_sign, + ) - return x_formatted + # Create a `bytes_pattern` object for affixing the `units_str`, which is the + # string that represents the byte units + space_character = " " if incl_space else "" + bytes_pattern = f"{{x}}{space_character}{units_str}" + + x_formatted = bytes_pattern.replace("{x}", x_formatted) - return fmt(self, fns=fmt_bytes_fn, columns=columns, rows=rows) + # Implement minus sign replacement for `x_formatted` + if is_negative: + minus_mark = _context_minus_mark(context="html") + x_formatted = _replace_minus(x_formatted, minus_mark=minus_mark) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_date( @@ -1677,40 +1840,57 @@ def fmt_date( # Get the date format string based on the `date_style` value date_format_str = _get_date_format(date_style=date_style) - # Generate a function that will operate on single `x` values in the table body - def fmt_date_fn( - x: Any, date_format_str: str = date_format_str, locale: str | None = locale - ) -> str: - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_date_context, + data=self, + date_format_str=date_format_str, + pattern=pattern, + locale=locale, + ) - # If `x` is a string, we assume it is an ISO date string and convert it to a date object - if isinstance(x, str): + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Convert the ISO date string to a date object - x = _iso_str_to_date(x) - else: - # Stop if `x` is not a valid date object - _validate_date_obj(x=x) +def fmt_date_context( + x: Any, + data: GTData, + date_format_str: str, + pattern: str, + locale: str | None, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - # Fix up the locale for `format_date()` by replacing any hyphens with underscores - if locale is None: - locale = "en_US" - else: - locale = _str_replace(locale, "-", "_") + # If `x` is a string, we assume it is an ISO date string and convert it to a date object + if isinstance(x, str): - # Format the date object to a string using Babel's `format_date()` function - x_formatted = format_date(x, format=date_format_str, locale=locale) + # Convert the ISO date string to a date object + x = _iso_str_to_date(x) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + else: + # Stop if `x` is not a valid date object + _validate_date_obj(x=x) - return x_formatted + # Fix up the locale for `format_date()` by replacing any hyphens with underscores + if locale is None: + locale = "en_US" + else: + locale = _str_replace(locale, "-", "_") + + # Format the date object to a string using Babel's `format_date()` function + x_formatted = format_date(x, format=date_format_str, locale=locale) - return fmt(self, fns=fmt_date_fn, columns=columns, rows=rows) + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_time( @@ -1808,40 +1988,57 @@ def fmt_time( # Get the time format string based on the `time_style` value time_format_str = _get_time_format(time_style=time_style) - # Generate a function that will operate on single `x` values in the table body - def fmt_time_fn( - x: Any, time_format_str: str = time_format_str, locale: str | None = locale - ) -> str: - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_time_context, + data=self, + time_format_str=time_format_str, + pattern=pattern, + locale=locale, + ) - # If `x` is a string, assume it is an ISO time string and convert it to a time object - if isinstance(x, str): + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # Convert the ISO time string to a time object - x = _iso_str_to_time(x) - else: - # Stop if `x` is not a valid time object - _validate_time_obj(x=x) +def fmt_time_context( + x: Any, + data: GTData, + time_format_str: str, + pattern: str, + locale: str | None, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - # Fix up the locale for `format_time()` by replacing any hyphens with underscores - if locale is None: - locale = "en_US" - else: - locale = _str_replace(locale, "-", "_") + # If `x` is a string, assume it is an ISO time string and convert it to a time object + if isinstance(x, str): - # Format the time object to a string using Babel's `format_time()` function - x_formatted = format_time(x, format=time_format_str, locale=locale) + # Convert the ISO time string to a time object + x = _iso_str_to_time(x) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + else: + # Stop if `x` is not a valid time object + _validate_time_obj(x=x) - return x_formatted + # Fix up the locale for `format_time()` by replacing any hyphens with underscores + if locale is None: + locale = "en_US" + else: + locale = _str_replace(locale, "-", "_") + + # Format the time object to a string using Babel's `format_time()` function + x_formatted = format_time(x, format=time_format_str, locale=locale) - return fmt(self, fns=fmt_time_fn, columns=columns, rows=rows) + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": + + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_datetime( @@ -1958,48 +2155,64 @@ def fmt_datetime( # Get the time format string based on the `time_style` value time_format_str = _get_time_format(time_style=time_style) - # Generate a function that will operate on single `x` values in the table body using both - # the date and time format strings - def fmt_datetime_fn( - x: Any, - date_format_str: str = date_format_str, - time_format_str: str = time_format_str, - sep: str = sep, - locale: str | None = locale, - ) -> str: - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_datetime_context, + data=self, + date_format_str=date_format_str, + time_format_str=time_format_str, + sep=sep, + pattern=pattern, + locale=locale, + ) - # From the date and time format strings, create a datetime format string - datetime_format_str = f"{date_format_str}'{sep}'{time_format_str}" + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - # If `x` is a string, assume it is an ISO datetime string and convert it to a datetime object - if isinstance(x, str): - # Convert the ISO datetime string to a datetime object - x = _iso_str_to_datetime(x) +def fmt_datetime_context( + x: Any, + data: GTData, + date_format_str: str, + time_format_str: str, + sep: str, + pattern: str, + locale: str | None, + context: str, +) -> str: + if is_na(data._tbl_data, x): + return x - else: - # Stop if `x` is not a valid datetime object - _validate_datetime_obj(x=x) + # From the date and time format strings, create a datetime format string + datetime_format_str = f"{date_format_str}'{sep}'{time_format_str}" - # Fix up the locale for `format_datetime()` by replacing any hyphens with underscores - if locale is None: - locale = "en_US" - else: - locale = _str_replace(locale, "-", "_") + # If `x` is a string, assume it is an ISO datetime string and convert it to a datetime object + if isinstance(x, str): - # Format the datetime object to a string using Babel's `format_datetime()` function - x_formatted = format_datetime(x, format=datetime_format_str, locale=locale) + # Convert the ISO datetime string to a datetime object + x = _iso_str_to_datetime(x) - # Use a supplied pattern specification to decorate the formatted value - if pattern != "{x}": - x_formatted = pattern.replace("{x}", x_formatted) + else: + # Stop if `x` is not a valid datetime object + _validate_datetime_obj(x=x) - return x_formatted + # Fix up the locale for `format_datetime()` by replacing any hyphens with underscores + if locale is None: + locale = "en_US" + else: + locale = _str_replace(locale, "-", "_") + + # Format the datetime object to a string using Babel's `format_datetime()` function + x_formatted = format_datetime(x, format=datetime_format_str, locale=locale) + + # Use a supplied pattern specification to decorate the formatted value + if pattern != "{x}": - return fmt(self, fns=fmt_datetime_fn, columns=columns, rows=rows) + # Escape LaTeX special characters from literals in the pattern + if context == "latex": + pattern = escape_pattern_str_latex(pattern_str=pattern) + + x_formatted = pattern.replace("{x}", x_formatted) + + return x_formatted def fmt_markdown( @@ -2067,19 +2280,31 @@ def fmt_markdown( single string value (or a list of them). """ - # Generate a function that will operate on single `x` values in the table body - def fmt_markdown_fn(x: Any) -> str: - # If the `x` value is a Pandas 'NA', then return the same value - if is_na(self._tbl_data, x): - return x + pf_format = partial( + fmt_markdown_context, + data=self, + ) - x_str: str = str(x) + return fmt_by_context(self, pf_format=pf_format, columns=columns, rows=rows) - x_formatted = _md_html(x_str) - return x_formatted +def fmt_markdown_context( + x: Any, + data: GTData, + context: str, +) -> str: + + if context == "latex": + raise NotImplementedError("fmt_markdown() is not supported in LaTeX.") - return fmt(self, fns=fmt_markdown_fn, columns=columns, rows=rows) + if is_na(data._tbl_data, x): + return x + + x_str: str = str(x) + + x_formatted = _md_html(x_str) + + return x_formatted def fmt_units( @@ -2662,8 +2887,16 @@ def _has_sci_order_zero(value: int | float) -> bool: return (value >= 1 and value < 10) or (value <= -1 and value > -10) or value == 0 -def _context_exp_marks() -> list[str]: - return [" \u00D7 10", ""] +def _context_exp_marks(context: str) -> list[str]: + + if context == "html": + marks = [" \u00D7 10", ""] + elif context == "latex": + marks = [" $\\times$ 10\\textsuperscript{", "}"] + else: + marks = [" \u00D7 10^", ""] + + return marks def _context_exp_str(exp_style: str) -> str: @@ -2683,8 +2916,34 @@ def _context_exp_str(exp_style: str) -> str: return exp_str -def _context_minus_mark() -> str: - return "\u2212" +def _context_minus_mark(context: str) -> str: + + if context == "html": + mark = "\u2212" + else: + mark = "-" + + return mark + + +def _context_percent_mark(context: str) -> str: + + if context == "latex": + mark = "\\%" + else: + mark = "%" + + return mark + + +def _context_dollar_mark(context: str) -> str: + + if context == "latex": + mark = "\\$" + else: + mark = "$" + + return mark def _replace_minus(string: str, minus_mark: str) -> str: @@ -3451,7 +3710,12 @@ def fmt_image( height = "2em" formatter = FmtImage(self._tbl_data, height, width, sep, str(path), file_pattern, encode) - return fmt(self, fns=formatter.to_html, columns=columns, rows=rows) + return fmt( + self, + fns=FormatFns(html=formatter.to_html, latex=formatter.to_latex, default=formatter.to_html), + columns=columns, + rows=rows, + ) @dataclass @@ -3517,6 +3781,15 @@ def to_html(self, val: Any): return span + def to_latex(self, val: Any): + + from ._gt_data import FormatterSkipElement + from warnings import warn + + warn("fmt_image() is not currently implemented in LaTeX output.") + + return FormatterSkipElement() + @staticmethod def _apply_pattern(file_pattern: str, files: list[str]) -> list[str]: return [file_pattern.format(file) for file in files] @@ -3875,6 +4148,7 @@ def fmt_nanoplot( # the date and time format strings def fmt_nanoplot_fn( x: Any, + context: str, plot_type: PlotType = plot_type, plot_height: str = plot_height, missing_vals: MissingVals = missing_vals, @@ -3883,6 +4157,10 @@ def fmt_nanoplot_fn( all_single_y_vals: list[int | float] | None = all_single_y_vals, options_plots: dict[str, Any] = options_plots, ) -> str: + + if context == "latex": + raise NotImplementedError("fmt_nanoplot() is not supported in LaTeX.") + # If the `x` value is a Pandas 'NA', then return the same value # We have to pass in a dataframe to this function. Everything action that # requires a dataframe import should go through _tbl_data. @@ -3930,7 +4208,7 @@ def fmt_nanoplot_fn( return nanoplot - return fmt(self, fns=fmt_nanoplot_fn, columns=columns, rows=rows) + return fmt_by_context(self, pf_format=fmt_nanoplot_fn, columns=columns, rows=rows) def _generate_data_vals( @@ -4064,3 +4342,21 @@ def _process_time_stream(data_vals: str) -> list[float]: time_stream_vals = [float(val) for val in time_stream] return time_stream_vals + + +def fmt_by_context( + self: GTSelf, + pf_format: Callable[[Any], str], + columns: SelectExpr, + rows: int | list[int] | None, +) -> GTSelf: + return fmt( + self, + fns=FormatFns( + html=partial(pf_format, context="html"), # type: ignore + latex=partial(pf_format, context="latex"), # type: ignore + default=partial(pf_format, context="html"), # type: ignore + ), + columns=columns, + rows=rows, + ) diff --git a/great_tables/_gt_data.py b/great_tables/_gt_data.py index 697c2404b..17880d6db 100644 --- a/great_tables/_gt_data.py +++ b/great_tables/_gt_data.py @@ -8,6 +8,7 @@ from typing import Any, Callable, Literal, Tuple, TypeVar, Union, overload, TYPE_CHECKING from typing_extensions import Self, TypeAlias +from functools import partial # TODO: move this class somewhere else (even gt_data could work) from ._options import tab_options @@ -24,10 +25,11 @@ to_list, validate_frame, ) +from ._text import BaseText from ._utils import _str_detect, OrderedSet if TYPE_CHECKING: - from ._helpers import Md, Html, UnitStr, Text + from ._helpers import Md, Html, UnitStr from ._locations import Loc T = TypeVar("T") @@ -403,7 +405,7 @@ def _get_column_labels(self) -> list[str | None]: return [x.column_label for x in self._d] # Set column label - def _set_column_labels(self, col_labels: dict[str, str | UnitStr | Text]) -> Self: + def _set_column_labels(self, col_labels: dict[str, str | UnitStr | BaseText]) -> Self: out_cols: list[ColInfo] = [] for x in self._d: new_label = col_labels.get(x.var, None) @@ -762,7 +764,7 @@ def indices_map(self, n: int) -> list[tuple[int, GroupRowInfo]]: class SpannerInfo: spanner_id: str spanner_level: int - spanner_label: str | Text | UnitStr | None = None + spanner_label: str | BaseText | UnitStr | None = None spanner_units: str | None = None spanner_pattern: str | None = None vars: list[str] = field(default_factory=list) diff --git a/great_tables/_helpers.py b/great_tables/_helpers.py index 0e27c82fe..eca10af0c 100644 --- a/great_tables/_helpers.py +++ b/great_tables/_helpers.py @@ -7,7 +7,7 @@ from typing_extensions import TypeAlias, Self -from ._text import Text, Md, Html +from ._text import Md, Html, BaseText import re from dataclasses import dataclass @@ -832,7 +832,7 @@ def to_html(self): return units_str -class UnitStr: +class UnitStr(BaseText): def __init__(self, units_str: list[str | UnitDefinitionList]): self.units_str = units_str @@ -850,6 +850,10 @@ def to_html(self) -> str: return built_units + def to_latex(self) -> str: + + raise NotImplementedError("LaTeX conversion of units is not yet supported.") + def _repr_html_(self): return self.to_html() diff --git a/great_tables/_options.py b/great_tables/_options.py index e2d62101a..896136885 100644 --- a/great_tables/_options.py +++ b/great_tables/_options.py @@ -474,6 +474,7 @@ def tab_options( row_striping_include_table_body An option for whether to include the table body when striping rows. + Returns ------- GT diff --git a/great_tables/_spanners.py b/great_tables/_spanners.py index c854c97a5..4039dfa98 100644 --- a/great_tables/_spanners.py +++ b/great_tables/_spanners.py @@ -6,7 +6,7 @@ from ._gt_data import SpannerInfo, Spanners from ._locations import resolve_cols_c from ._tbl_data import SelectExpr -from ._text import Text +from ._text import BaseText from ._utils import OrderedSet, _assert_list_is_subset if TYPE_CHECKING: @@ -19,7 +19,7 @@ def tab_spanner( self: GTSelf, - label: str | Text, + label: str | BaseText, columns: SelectExpr = None, spanners: str | list[str] | None = None, level: int | None = None, @@ -132,7 +132,7 @@ def tab_spanner( if id is None: # The label may contain HTML or Markdown, so we need to extract # it from the Text object - if isinstance(label, Text): + if isinstance(label, BaseText): id = label.text else: id = label @@ -197,7 +197,7 @@ def tab_spanner( else: new_label = unitstr - elif isinstance(label, Text): + elif isinstance(label, BaseText): new_label = label else: diff --git a/great_tables/_text.py b/great_tables/_text.py index 74239776a..33d31f37d 100644 --- a/great_tables/_text.py +++ b/great_tables/_text.py @@ -3,52 +3,145 @@ import html import re from dataclasses import dataclass -from typing import Literal, Union +from typing import Callable import commonmark +class BaseText: + """Abstract base class for text elements""" + + def to_html(self) -> str: + raise NotImplementedError("Method not implemented") + + def to_latex(self) -> str: + raise NotImplementedError("Method not implemented") + + @dataclass -class Text: +class Text(BaseText): + """As-is text""" + text: str + def to_html(self) -> str: + return self.text + + def to_latex(self) -> str: + return self.text + class Md(Text): """Markdown text""" + def to_html(self) -> str: + return _md_html(self.text) + + def to_latex(self) -> str: + return _md_latex(self.text) + class Html(Text): """HTML text""" + def to_html(self) -> str: + return self.text + + def to_latex(self) -> str: + + from ._utils_render_latex import _not_implemented + + _not_implemented( + "Using the `html()` helper function won't convert HTML to LaTeX. Escaping HTML string instead." + ) + + return _latex_escape(self.text) + def _md_html(x: str) -> str: str = commonmark.commonmark(x) return re.sub(r"^

|

\n$", "", str) -def _process_text(x: str | Text | None) -> str: - from great_tables._helpers import UnitStr +def _md_latex(x: str) -> str: + + # TODO: Implement commonmark to LaTeX conversion (through a different library as + # commonmark-py does not support it) + raise NotImplementedError("Markdown to LaTeX conversion is not supported yet") + + +def _process_text(x: str | BaseText | None, context: str = "html") -> str: if x is None: return "" - if isinstance(x, Md): - return _md_html(x.text) - elif isinstance(x, Html): - return x.text - elif isinstance(x, str): - return _html_escape(x) - elif isinstance(x, Text): - return x.text - elif isinstance(x, UnitStr): - return x.to_html() - else: - raise TypeError(f"Invalid type: {type(x)}") - - -def _process_text_id(x: str | Text | None) -> str: + escape_fn = _html_escape if context == "html" else _latex_escape + + if isinstance(x, str): + + return escape_fn(x) + + elif isinstance(x, BaseText): + + return x.to_html() if context == "html" else x.to_latex() + + raise TypeError(f"Invalid type: {type(x)}") + + +def _process_text_id(x: str | BaseText | None) -> str: return _process_text(x) def _html_escape(x: str) -> str: return html.escape(x) + + +def _latex_escape(text: str) -> str: + + latex_escape_regex = "[\\\\&%$#_{}~^]" + text = re.sub(latex_escape_regex, lambda match: "\\" + match.group(), text) + + return text + + +def escape_pattern_str_latex(pattern_str: str) -> str: + + pattern = r"(\{[x0-9]+\})" + + return process_string(pattern_str, pattern, _latex_escape) + + +def process_string(string: str, pattern: str, func: Callable[[str], str]) -> str: + """ + Apply a function to segments of a string that are unmatched by a regex pattern. + + This function splits a string based on a regex pattern to a list of strings, and invokes the + supplied function (in `func=`) to those list elements that *do not* match the pattern (i.e., + the matched components are untouched). Finally, the processed list of text fragments is then + joined back into a single . + + Parameters + ---------- + string + The string to process. + pattern + The regex pattern used for splitting the input string. + func + The function applied to elements that do not match the pattern. + + Returns + ------- + str + A processed string. + """ + + # Split the string by the pattern + split_result = re.split(pattern, string) + + # Apply the function to elements that do not match the pattern + processed_list = [func(part) if not re.match(pattern, part) else part for part in split_result] + + # Recombine the list elements to obtain a selectively processed string + combined_str = "".join(processed_list) + + return combined_str diff --git a/great_tables/_utils.py b/great_tables/_utils.py index 5cdfa85e9..c8ca9ae2b 100644 --- a/great_tables/_utils.py +++ b/great_tables/_utils.py @@ -9,7 +9,15 @@ from types import ModuleType from typing import Any, Iterable -from ._tbl_data import PdDataFrame + +from ._tbl_data import PdDataFrame, _set_cell, _get_cell, get_column_names, n_rows +from ._text import _process_text + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from great_tables._tbl_data import TblData + from great_tables._gt_data import FormatInfo, GTData def _try_import(name: str, pip_install_line: str | None = None) -> ModuleType: @@ -237,3 +245,59 @@ def seq_groups(seq: Iterable[str]) -> Generator[tuple[str, int], None, None]: def is_equal(x: Any, y: Any) -> bool: return x is not None and x == y + + +# TODO: type annotations for `data`, `data_tbl`, `formats`, and the return value are not included +# yet since that would result in a circular import. This will be fixed in the future (when HTML +# escaping is implemented). +def _migrate_unformatted_to_output( + data: GTData, data_tbl: TblData, formats: list[FormatInfo], context: str +) -> GTData: + """ + Escape unformatted cells so they are safe for a specific output context. + """ + + # TODO: This function will eventually be applied to all context types but for now + # it's just used for LaTeX output + if context != "latex": + return data + + all_formatted_cells = [] + + for fmt in formats: + eval_func = getattr(fmt.func, context, fmt.func.default) + if eval_func is None: + raise Exception("Internal Error") + + # Accumulate all formatted cells in the table + all_formatted_cells.append(fmt.cells.resolve()) + + # Deduplicate the list of formatted cells + all_formatted_cells = list(set([item for sublist in all_formatted_cells for item in sublist])) + + # Get all visible cells in the table + all_visible_cells = _get_visible_cells(data=data_tbl) + + # Get the difference between the visible cells and the formatted cells + all_unformatted_cells = list(set(all_visible_cells) - set(all_formatted_cells)) + + # TODO: this currently will only be used for LaTeX (HTML escaping will be performed + # in the future) + + for col, row in all_unformatted_cells: + + # Get the cell value and cast as string + cell_value = _get_cell(data_tbl, row, col) + cell_value_str = str(cell_value) + + result = _process_text(cell_value_str, context=context) + + _set_cell(data._body.body, row, col, result) + + return data + + +# Get a list of tuples for all visible cells in the table +# TODO: define the type of `data` as `TblData` when doing so won't result in a circular import +def _get_visible_cells(data) -> list[tuple[str, int]]: + return [(col, row) for col in get_column_names(data) for row in range(n_rows(data))] diff --git a/great_tables/_utils_render_latex.py b/great_tables/_utils_render_latex.py new file mode 100644 index 000000000..aa6760646 --- /dev/null +++ b/great_tables/_utils_render_latex.py @@ -0,0 +1,683 @@ +from __future__ import annotations + +from itertools import chain +from typing import TYPE_CHECKING +import warnings + +import re +from ._tbl_data import _get_cell, cast_frame_to_string, replace_null_frame +from .quarto import is_quarto_render +from ._spanners import spanners_print_matrix +from ._utils import heading_has_subtitle, heading_has_title, seq_groups +from ._utils_render_html import _get_spanners_matrix_height +from ._text import _process_text + +if TYPE_CHECKING: + from ._gt_data import GTData, GroupRowInfo + + +LENGTH_TRANSLATIONS_TO_PX = { + "px": 1.0, + "pt": 4 / 3, + "in": 96.0, + "cm": 37.7952755906, + "emu": 1 / 9525, + "em": 16.0, +} + + +def _not_implemented(msg: str) -> None: + + warnings.warn(msg) + + +def is_css_length_string(x: str) -> bool: + + # This checks if there is a number followed by an optional string (only of letters) + return re.match(r"^[0-9.]+[a-zA-Z]*$", x) is not None + + +def is_number_without_units(x: str) -> bool: + + # This check if the string is a number without any text + return re.match(r"^[0-9.]+$", x) is not None + + +def css_length_has_supported_units(x: str, no_units_valid: bool = True) -> bool: + + # Check if the the string is a valid CSS length string with a text string + + if not is_css_length_string(x): + return False + + # If the string is a number without units, we can return the value of `no_units_valid` + if is_number_without_units(x): + return no_units_valid + + units = get_units_from_length_string(x) + + return units in LENGTH_TRANSLATIONS_TO_PX.keys() + + +def get_units_from_length_string(length: str) -> str: + + # Extract the units from a string that is likely in the form of '123px' or '3.23in' in + # order to return 'px' or 'in' respectively; we'll also need to trim any whitespace and + # convert the string to lowercase + units_str = re.sub(r"[0-9.]+", "", length).strip().lower() + + if units_str == "": + return "px" + + return units_str + + +def get_px_conversion(length: str) -> float: + + input_units = get_units_from_length_string(length) + + if input_units == "px": + return 1.0 + + valid_units = list(LENGTH_TRANSLATIONS_TO_PX.keys()) + + if input_units not in valid_units: + raise ValueError(f"Invalid units: {input_units}") + + return LENGTH_TRANSLATIONS_TO_PX.get(input_units, 0.0) + + +def convert_to_px(length: str) -> float: + + # Extract the units from a string that is likely in the form of '123px' or '3.23in' + units = get_units_from_length_string(length=length) + + # Extract the numeric value from the string and convert to a float + value = float(re.sub(r"[a-zA-Z\s]", "", length)) + + # If the units are already in pixels, we can return the value as-is (w/o rounding) + if units == "px": + return value + + # Get the conversion factor for the units + # - this defaults to 1.0 if the units are 'px' + # - otherwise, it will be a value that converts the units `value` to pixels + px_conversion = get_px_conversion(length=units) + + return round(value * px_conversion) + + +def convert_to_pt(x: str) -> float: + + px_value = convert_to_px(x) + + return px_value * 3 / 4 + + +def latex_heading_row(content: list[str]) -> str: + + return "".join([" & ".join(content) + " \\\\ \n", "\\midrule\\addlinespace[2.5pt]"]) + + +def create_table_start_l(data: GTData, use_longtable: bool) -> str: + """ + Create the table start component for LaTeX output. + + This function generates the LaTeX code that signifies the start of the table. The output is + different depending on whether the table uses the `longtable` environment or not. + + Parameters + ---------- + data : GTData + The GTData object that contains all the information about the table. + + Returns + ------- + str + The LaTeX code that signifies the start of the table. + """ + + # Determine if there are any source notes; if any, add a `\setlength` command that will pull up + # the minipage environment for the footer block + + # Get all source notes as a list + source_notes = data._source_notes + + if len(source_notes) > 0: + + longtable_post_length = "\\setlength{\\LTpost}{0mm}\n" + + else: + + longtable_post_length = "" + + # Get the column alignments for the visible columns as a list of `col_defs` + col_defs = [align[0] for align in data._boxhead._get_default_alignments()] + + # If a table width is specified, add an extra column + # space to fill in enough space to match the width + extra_sep = "" + + # Obtain the table width value from the `table_width` options value + table_width = data._options.table_width.value + + if table_width != "auto": + extra_sep = "@{\\extracolsep{\\fill}}" + + # determine string for table width if using tabular* environment + hdr_tabular = "" + + if not use_longtable: + + # we need to use the extracolsep here for tabular* regardless of width + extra_sep = "@{\\extracolsep{\\fill}}" + + if table_width.endswith("%"): + + tw = float(table_width.replace("%", "")) + + tw_frac = tw / 100 + + hdr_tabular = f"\\begin{{tabular*}}{{{tw_frac}\\linewidth}}{{" + + elif table_width.endswith("px"): + + width_in_pt = convert_to_pt(table_width) + hdr_tabular = f"\\begin{{tabular*}}{{{width_in_pt}pt}}{{" + + else: + + hdr_tabular = "\\begin{tabular*}{\\linewidth}{" + + # Generate setup statements for table including default left + # alignments and vertical lines for any stub columns + table_start = "".join( + [ + longtable_post_length if use_longtable else "", + "\\begin{longtable}{" if use_longtable else hdr_tabular, + extra_sep, + "".join(col_defs), + "}", + ] + ) + + return table_start + + +def create_heading_component_l(data: GTData, use_longtable: bool) -> str: + """ + Create the heading component for LaTeX output. + + This function generates the LaTeX code for the heading component of a table which involves the + title and the optional subtitle. There is variation in the output based on whether the table + uses the `longtable` environment or not. + + Parameters + ---------- + data : GTData + The GTData object that contains all the information about the table. + + Returns + ------- + str + The LaTeX code for the heading component of the table. + """ + + title = data._heading.title + subtitle = data._heading.subtitle + + line_continuation = "\\\\" + + has_title = heading_has_title(title) + + # If there is no title, then return an empty string + if not has_title: + return "" + + title_str = _process_text(title, context="latex") + + title_row = f"{{\\large {title_str}}}" + + has_subtitle = heading_has_subtitle(subtitle) + + if has_subtitle: + + subtitle_str = _process_text(subtitle, context="latex") + + subtitle_row = f"{{\\small {subtitle_str}}}" + + header_component = f"""\\caption*{{ +{title_row} \\\\ +{subtitle_row} +}} {line_continuation if use_longtable else ""}""" + + else: + + header_component = f"""\\caption*{{ +{title_row} +}} {line_continuation if use_longtable else ""}""" + + return header_component + + +def create_columns_component_l(data: GTData) -> str: + """ + Create the columns component for LaTeX output. + + This function generates the LaTeX code for the columns component of a table which involves the + column headings and the spanners. + + Parameters + ---------- + data : GTData + The GTData object that contains all the information about the table. + + Returns + ------- + str + The LaTeX code for the columns component of the table. + """ + + # Determine the finalized number of spanner rows + spanner_row_count = _get_spanners_matrix_height(data=data, omit_columns_row=True) + + # Get the column headings + headings_vars = data._boxhead._get_default_columns() + headings_labels = data._boxhead._get_default_column_labels() + + # Ensure that the heading labels are processed for LaTeX + headings_labels = [_process_text(x, context="latex") for x in headings_labels] + + table_col_headings = "".join(latex_heading_row(content=headings_labels)) + + if spanner_row_count > 0: + + boxhead = data._boxhead + + table_col_spanners = [] + + spanners, _ = spanners_print_matrix( + spanners=data._spanners, + boxhead=boxhead, + include_hidden=False, + ids=False, + omit_columns_row=True, + ) + + # TODO: ensure that spanner IDs are not included in the output (spanner + # labels should be used instead) + + spanner_ids, spanner_col_names = spanners_print_matrix( + spanners=data._spanners, + boxhead=boxhead, + include_hidden=False, + ids=True, + omit_columns_row=True, + ) + + for i in range(len(spanners)): + + spanners_row = spanners[i] + + for k, v in spanners_row.items(): + if v is None: + spanners_row[k] = "" + + spanner_ids_index = spanners_row.values() + spanners_rle = seq_groups(seq=spanner_ids_index) + + group_spans = [[x[1]] + [0] * (x[1] - 1) for x in spanners_rle] + colspans = list(chain(*group_spans)) + level_i_spanners = [] + + for colspan, span_label in zip(colspans, spanners_row.values()): + if colspan > 0: + + if span_label: + span = _process_text(span_label, context="latex") + + else: + span = None + + level_i_spanners.append(span) + + spanner_labs = [] + spanner_lines = [] + span_accumlator = 0 + + for j, _ in enumerate(level_i_spanners): + + if level_i_spanners[j] is None: + + # Get the number of columns to span nothing + span = group_spans[j][0] + spanner_labs.append("" * span) + + elif level_i_spanners[j] is not None: + + # Get the number of columns to span the spanner + span = group_spans[j][0] + + # TODO: Get alignment for spanner, for now it's center (`c`) + + # Get multicolumn statement for spanner + multicolumn_stmt = f"\\multicolumn{{{span}}}{{c}}{{{level_i_spanners[j]}}}" + + spanner_labs.append(multicolumn_stmt) + + # Get cmidrule statement for spanner, it uses 1-based indexing + # and the span is the number of columns to span; we use the `span_accumlator` + # across iterations to adjust the starting index (j) to adjust for previous + # multicolumn spanning values + + begin = j + span_accumlator + 1 + end = j + span_accumlator + span + + cmidrule = f"\\cmidrule(lr){{{begin}-{end}}}" + + span_accumlator += span - 1 + + spanner_lines.append(cmidrule) + + spanner_labs_row = " & ".join(spanner_labs) + " \\\\ \n" + spanner_lines_row = " ".join(spanner_lines) + "\n" + + col_spanners_i = spanner_labs_row + spanner_lines_row + + table_col_spanners.append(col_spanners_i) + + table_col_spanners = "".join(table_col_spanners) + + else: + + table_col_spanners = "" + + columns_component = "\\toprule\n" + table_col_spanners + table_col_headings + + return columns_component + + +def create_body_component_l(data: GTData) -> str: + """ + Create the body component for LaTeX output. + + This function generates the LaTeX code for the body component of a table which involves the + data cells, the row groups, and the stub. + + Parameters + ---------- + data : GTData + The GTData object that contains all the information about the table. + + Returns + ------- + str + The LaTeX code for the body component of the table. + """ + + _str_orig_data = cast_frame_to_string(data._tbl_data) + tbl_data = replace_null_frame(data._body.body, _str_orig_data) + + # Get the default column vars + column_vars = data._boxhead._get_default_columns() + + body_rows = [] + + ordered_index: list[tuple[int, GroupRowInfo]] = data._stub.group_indices_map() + + for i, _ in ordered_index: + + body_cells: list[str] = [] + + # Create a body row + for colinfo in column_vars: + cell_content = _get_cell(tbl_data, i, colinfo.var) + cell_str: str = str(cell_content) + + body_cells.append(cell_str) + + # When joining the body cells together, we need to ensure that each item is separated by + # an ampersand and that the row is terminated with a double backslash + body_cells = " & ".join(body_cells) + " \\\\" + + body_rows.append("".join(body_cells)) + + # When joining all the body rows together, we need to ensure that each row is separated by + # newline except for the last + + all_body_rows = "\n".join(body_rows) + + return all_body_rows + + +def create_footer_component_l(data: GTData) -> str: + """ + Create the footer component for LaTeX output. + + This function generates the LaTeX code for the footer component of a table which involves the + source notes. + + Parameters + ---------- + data : GTData + The GTData object that contains all the information about the table. + + Returns + ------- + str + The LaTeX code for the footer component of the table. + """ + + # Get all source notes as a list + source_notes = data._source_notes + + if len(source_notes) == 0: + return "" + + # Ensure that the source notes are processed for LaTeX + source_notes = [_process_text(x, context="latex") for x in source_notes] + + # Create a formatted source notes string + source_notes = "\\\\\n".join(source_notes) + "\\\\" + + # Create the footer block + footer_block = f"""\\begin{{minipage}}{{\\linewidth}} +{source_notes} +\\end{{minipage}}""" + + return footer_block + + +def create_table_end_l(use_longtable: bool) -> str: + """ + Create the table end component for LaTeX output. + + This function generates the LaTeX code that signifies the end of the table. The output is + different depending on whether the table uses the `longtable` environment or not. + + Parameters + ---------- + data : GTData + The GTData object that contains all the information about the table. + + Returns + ------- + str + The LaTeX code that signifies the end of the table. + """ + + table_end = "\\bottomrule\n" + ("\\end{longtable}" if use_longtable else "\\end{tabular*}") + + return table_end + + +def derive_table_width_statement_l(data: GTData, use_longtable: bool) -> str: + + # Get the table width value + tbl_width = data._options.table_width.value + + # Initialize the statement variables LTleft and LTright + sides = ["LTleft", "LTright"] + + # Bookends are not required if a table width is not specified or if using floating table + if tbl_width == "auto" or not use_longtable: + + statement = "" + + elif tbl_width.endswith("%"): + + tw = float(tbl_width.strip("%")) + + side_width = (100 - tw) / 200 + side_width = f"{side_width:.6f}".rstrip("0").rstrip(".") + + statement = "\n".join([f"\\setlength\\{side}{{{side_width}\\linewidth}}" for side in sides]) + + else: + + width_in_pt = convert_to_pt(tbl_width) + + halfwidth_in_pt = f"{width_in_pt / 2:.6f}".rstrip("0").rstrip(".") + + statement = "\n".join( + f"\\setlength\\{side}{{\\dimexpr(0.5\\linewidth - {halfwidth_in_pt}pt)}}" + for side in sides + ) + + return statement + + +def create_fontsize_statement_l(data: GTData) -> str: + + table_font_size = data._options.table_font_size.value + + fs_fmt = "\\fontsize{%3.1fpt}{%3.1fpt}\\selectfont\n" + + if table_font_size.endswith("%"): + + multiple = float(table_font_size.strip("%")) / 100 + fs_statement = fs_fmt % (multiple * 12, multiple * 12 * 1.2) + + elif table_font_size.endswith("pt"): + + size_in_pt = float(table_font_size[:-2]) + fs_statement = fs_fmt % (size_in_pt, size_in_pt * 1.2) + + elif css_length_has_supported_units(table_font_size): + + size_in_pt = convert_to_px(table_font_size) * 0.75 + fs_statement = fs_fmt % (size_in_pt, size_in_pt * 1.2) + + else: + fs_statement = "" + + return fs_statement + + +def create_wrap_start_l(use_longtable: bool, tbl_pos: str | None) -> str: + + if is_quarto_render(): + tbl_pos = "" + + else: + if tbl_pos is None: + tbl_pos = "!t" + + tbl_pos = f"[{tbl_pos}]" + + if use_longtable: + return "\\begingroup" + else: + return f"\\begin{{table}}{tbl_pos}" + + +def create_wrap_end_l(use_longtable: bool) -> str: + + wrap_end = "\\endgroup" if use_longtable else "\\end{table}" + + return wrap_end + + +def _render_as_latex(data: GTData, use_longtable: bool = False, tbl_pos: str | None = None) -> str: + + # Check for styles (not yet supported so warn user) + if data._styles: + _not_implemented("Styles are not yet supported in LaTeX output.") + + # Get list representation of stub layout + stub_layout = data._stub._get_stub_layout(options=data._options) + + # Throw exception if a stub is present in the table + if "rowname" in stub_layout or "group_label" in stub_layout: + + raise NotImplementedError( + "The table stub (row names and/or row groups) are not yet supported in LaTeX output." + ) + + # Determine if row groups are used + has_groups = len(data._stub.group_ids) > 0 + + # Throw exception if row groups are used in LaTeX output (extra case where row + # groups are used but not in the stub) + if has_groups: + + raise NotImplementedError("Row groups are not yet supported in LaTeX output.") + + # Create a LaTeX fragment for the start of the table + table_start = create_table_start_l(data=data, use_longtable=use_longtable) + + # Create the heading component + heading_component = create_heading_component_l(data=data, use_longtable=use_longtable) + + # Create the columns component + columns_component = create_columns_component_l(data=data) + + # Create the body component + body_component = create_body_component_l(data=data) + + # Create the footnotes component + footer_component = create_footer_component_l(data=data) + + # Create a LaTeX fragment for the ending tabular statement + table_end = create_table_end_l(use_longtable=use_longtable) + + # Create a LaTeX fragment for the table width statement + table_width_statement = derive_table_width_statement_l(data=data, use_longtable=use_longtable) + + # Allow user to set a font-size + fontsize_statement = create_fontsize_statement_l(data=data) + + # Create wrapping environment + wrap_start_statement = create_wrap_start_l(use_longtable=use_longtable, tbl_pos=tbl_pos) + wrap_end_statement = create_wrap_end_l(use_longtable=use_longtable) + + # Compose the LaTeX table + if use_longtable: + + finalized_table = f"""{wrap_start_statement} +{table_width_statement} +{fontsize_statement} +{table_start} +{heading_component} +{columns_component} +{body_component} +{table_end} +{footer_component} +{wrap_end_statement} +""" + + else: + + finalized_table = f"""{wrap_start_statement} +{heading_component} +{table_width_statement} +{fontsize_statement} +{table_start} +{columns_component} +{body_component} +{table_end} +{footer_component} +{wrap_end_statement} +""" + + return finalized_table diff --git a/great_tables/gt.py b/great_tables/gt.py index 5715720a9..b62759df0 100644 --- a/great_tables/gt.py +++ b/great_tables/gt.py @@ -10,7 +10,7 @@ from great_tables._body import body_reassemble from great_tables._boxhead import cols_align, cols_label from great_tables._data_color import data_color -from great_tables._export import as_raw_html, save, show +from great_tables._export import as_raw_html, as_latex, save, show from great_tables._formats import ( fmt, fmt_bytes, @@ -63,6 +63,7 @@ from great_tables._substitution import sub_missing, sub_zero from great_tables._tab_create_modify import tab_style from great_tables._tbl_data import _get_cell, n_rows +from great_tables._utils import _migrate_unformatted_to_output from great_tables._utils_render_html import ( _get_table_defs, create_body_component_h, @@ -72,6 +73,7 @@ create_source_notes_component_h, ) + __all__ = ["GT"] @@ -270,6 +272,7 @@ def __init__( save = save show = show as_raw_html = as_raw_html + as_latex = as_latex # ----- @@ -302,7 +305,11 @@ def _build_data(self, context: str) -> Self: # Build the body of the table by generating a dictionary # of lists with cells initially set to nan values built = self._render_formats(context) - # built._body = _migrate_unformatted_to_output(body) + + if context == "latex": + built = _migrate_unformatted_to_output( + data=built, data_tbl=self._tbl_data, formats=self._formats, context=context + ) # built._perform_col_merge() final_body = body_reassemble(built._body, built._stub, built._boxhead) diff --git a/great_tables/quarto.py b/great_tables/quarto.py new file mode 100644 index 000000000..5f4e05b16 --- /dev/null +++ b/great_tables/quarto.py @@ -0,0 +1,13 @@ +import os + + +def is_quarto_render() -> bool: + """ + Check if the current environment is a Quarto environment. + + This environment variable check is used to determine if there is currently a Quarto + render occurring. This is useful for determining if certain rendering options should be + enabled or disabled for this specific environment. + """ + + return "QUARTO_BIN_PATH" in os.environ diff --git a/tests/__snapshots__/test_export.ambr b/tests/__snapshots__/test_export.ambr index aa967589e..b47a75c80 100644 --- a/tests/__snapshots__/test_export.ambr +++ b/tests/__snapshots__/test_export.ambr @@ -135,3 +135,69 @@ ''' # --- +# name: test_snap_as_latex + ''' + \begingroup + \setlength\LTleft{\dimexpr(0.5\linewidth - 225pt)} + \setlength\LTright{\dimexpr(0.5\linewidth - 225pt)} + \fontsize{9.0pt}{10.8pt}\selectfont + + \setlength{\LTpost}{0mm} + \begin{longtable}{@{\extracolsep{\fill}}llrrr} + \caption*{ + {\large The \_title\_} \\ + {\small The subtitle} + } \\ + \toprule + \multicolumn{2}{c}{Make \_and\_ Model} & \multicolumn{2}{c}{Performance} & \\ + \cmidrule(lr){1-2} \cmidrule(lr){3-4} + mfr & model & hp & trq & msrp \\ + \midrule\addlinespace[2.5pt] + Ford & GT & 647.0 & 550.0 & \$447,000.00 \\ + Ferrari & 458 Speciale & 597.0 & 398.0 & \$291,744.00 \\ + Ferrari & 458 Spider & 562.0 & 398.0 & \$263,553.00 \\ + Ferrari & 458 Italia & 562.0 & 398.0 & \$233,509.00 \\ + Ferrari & 488 GTB & 661.0 & 561.0 & \$245,400.00 \\ + \bottomrule + \end{longtable} + \begin{minipage}{\linewidth} + Note 1\\ + Note 2\\ + \end{minipage} + \endgroup + + ''' +# --- +# name: test_snap_render_as_latex + ''' + \begingroup + \setlength\LTleft{\dimexpr(0.5\linewidth - 225pt)} + \setlength\LTright{\dimexpr(0.5\linewidth - 225pt)} + \fontsize{9.0pt}{10.8pt}\selectfont + + \setlength{\LTpost}{0mm} + \begin{longtable}{@{\extracolsep{\fill}}llrrr} + \caption*{ + {\large The \_title\_} \\ + {\small The subtitle} + } \\ + \toprule + \multicolumn{2}{c}{Make \_and\_ Model} & \multicolumn{2}{c}{Performance} & \\ + \cmidrule(lr){1-2} \cmidrule(lr){3-4} + mfr & model & hp & trq & msrp \\ + \midrule\addlinespace[2.5pt] + Ford & GT & 647.0 & 550.0 & \$447,000.00 \\ + Ferrari & 458 Speciale & 597.0 & 398.0 & \$291,744.00 \\ + Ferrari & 458 Spider & 562.0 & 398.0 & \$263,553.00 \\ + Ferrari & 458 Italia & 562.0 & 398.0 & \$233,509.00 \\ + Ferrari & 488 GTB & 661.0 & 561.0 & \$245,400.00 \\ + \bottomrule + \end{longtable} + \begin{minipage}{\linewidth} + Note 1\\ + Note 2\\ + \end{minipage} + \endgroup + + ''' +# --- diff --git a/tests/__snapshots__/test_utils_render_latex.ambr b/tests/__snapshots__/test_utils_render_latex.ambr new file mode 100644 index 000000000..813970c3c --- /dev/null +++ b/tests/__snapshots__/test_utils_render_latex.ambr @@ -0,0 +1,98 @@ +# serializer version: 1 +# name: test_snap_render_as_latex + ''' + \begingroup + \setlength\LTleft{\dimexpr(0.5\linewidth - 225pt)} + \setlength\LTright{\dimexpr(0.5\linewidth - 225pt)} + \fontsize{9.0pt}{10.8pt}\selectfont + + \setlength{\LTpost}{0mm} + \begin{longtable}{@{\extracolsep{\fill}}llrrr} + \caption*{ + {\large The \_title\_} \\ + {\small The subtitle} + } \\ + \toprule + \multicolumn{2}{c}{Make \_and\_ Model} & \multicolumn{2}{c}{Performance} & \\ + \cmidrule(lr){1-2} \cmidrule(lr){3-4} + mfr & model & hp & trq & msrp \\ + \midrule\addlinespace[2.5pt] + Ford & GT & 647.0 & 550.0 & \$447,000.00 \\ + Ferrari & 458 Speciale & 597.0 & 398.0 & \$291,744.00 \\ + Ferrari & 458 Spider & 562.0 & 398.0 & \$263,553.00 \\ + Ferrari & 458 Italia & 562.0 & 398.0 & \$233,509.00 \\ + Ferrari & 488 GTB & 661.0 & 561.0 & \$245,400.00 \\ + \bottomrule + \end{longtable} + \begin{minipage}{\linewidth} + Note 1\\ + Note 2\\ + \end{minipage} + \endgroup + + ''' +# --- +# name: test_snap_render_as_latex_floating_table + ''' + \begin{table}[!t] + \caption*{ + {\large The \_title\_} \\ + {\small The subtitle} + } + + \fontsize{9.0pt}{10.8pt}\selectfont + + \begin{tabular*}{450.0pt}{@{\extracolsep{\fill}}llrrr} + \toprule + \multicolumn{2}{c}{Make \_and\_ Model} & \multicolumn{2}{c}{Performance} & \\ + \cmidrule(lr){1-2} \cmidrule(lr){3-4} + mfr & model & hp & trq & msrp \\ + \midrule\addlinespace[2.5pt] + Ford & GT & 647.0 & 550.0 & \$447,000.00 \\ + Ferrari & 458 Speciale & 597.0 & 398.0 & \$291,744.00 \\ + Ferrari & 458 Spider & 562.0 & 398.0 & \$263,553.00 \\ + Ferrari & 458 Italia & 562.0 & 398.0 & \$233,509.00 \\ + Ferrari & 488 GTB & 661.0 & 561.0 & \$245,400.00 \\ + \bottomrule + \end{tabular*} + \begin{minipage}{\linewidth} + Note 1\\ + Note 2\\ + \end{minipage} + \end{table} + + ''' +# --- +# name: test_snap_render_as_latex_longtable + ''' + \begingroup + \setlength\LTleft{\dimexpr(0.5\linewidth - 225pt)} + \setlength\LTright{\dimexpr(0.5\linewidth - 225pt)} + \fontsize{9.0pt}{10.8pt}\selectfont + + \setlength{\LTpost}{0mm} + \begin{longtable}{@{\extracolsep{\fill}}llrrr} + \caption*{ + {\large The \_title\_} \\ + {\small The subtitle} + } \\ + \toprule + \multicolumn{2}{c}{Make \_and\_ Model} & \multicolumn{2}{c}{Performance} & \\ + \cmidrule(lr){1-2} \cmidrule(lr){3-4} + mfr & model & hp & trq & msrp \\ + \midrule\addlinespace[2.5pt] + Ford & GT & 647.0 & 550.0 & \$447,000.00 \\ + Ferrari & 458 Speciale & 597.0 & 398.0 & \$291,744.00 \\ + Ferrari & 458 Spider & 562.0 & 398.0 & \$263,553.00 \\ + Ferrari & 458 Italia & 562.0 & 398.0 & \$233,509.00 \\ + Ferrari & 488 GTB & 661.0 & 561.0 & \$245,400.00 \\ + \bottomrule + \end{longtable} + \begin{minipage}{\linewidth} + Note 1\\ + Note 2\\ + \end{minipage} + \endgroup + + ''' +# --- diff --git a/tests/test_export.py b/tests/test_export.py index a382912a3..9f2514f9b 100644 --- a/tests/test_export.py +++ b/tests/test_export.py @@ -5,6 +5,7 @@ import time from great_tables import GT, exibble, md +from great_tables.data import gtcars from great_tables._export import _infer_render_target, _create_temp_file_server from pathlib import Path @@ -96,3 +97,23 @@ def test_create_temp_file_server(): r.content.decode() == "abc" thread.join() + + +def test_snap_as_latex(snapshot): + + gt_tbl = ( + GT( + gtcars[["mfr", "model", "hp", "trq", "msrp"]].head(5), + ) + .tab_header(title="The _title_", subtitle="The subtitle") + .tab_spanner(label="Make _and_ Model", columns=["mfr", "model"]) + .tab_spanner(label="Performance", columns=["hp", "trq"]) + .fmt_currency(columns="msrp") + .tab_source_note("Note 1") + .tab_source_note("Note 2") + .tab_options(table_width="600px", table_font_size="12px") + ) + + latex_str_as_latex = gt_tbl.as_latex(use_longtable=True) + + assert snapshot == latex_str_as_latex diff --git a/tests/test_helpers.py b/tests/test_helpers.py index d0ef6215f..f6b5162d5 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -381,6 +381,14 @@ def test_unit_str_unmatched_brackets(): assert res[2] == "" +def test_define_units_latex_raises(): + + with pytest.raises(NotImplementedError) as exc_info: + UnitStr.from_str("a b").to_latex() + + assert "LaTeX conversion of units is not yet supported." in exc_info.value.args[0] + + @pytest.mark.parametrize( "value, scale, expected", [("0.5px", 0.5, 0), ["1px", 1, 1], ["2.1px", 2.1, 4]] ) diff --git a/tests/test_text.py b/tests/test_text.py new file mode 100644 index 000000000..18479235e --- /dev/null +++ b/tests/test_text.py @@ -0,0 +1,81 @@ +import pytest + +from great_tables._text import ( + BaseText, + Text, + Md, + Html, + _latex_escape, + escape_pattern_str_latex, + _process_text, +) + + +def test_base_text_class(): + + with pytest.raises(NotImplementedError): + BaseText().to_html() + + with pytest.raises(NotImplementedError): + BaseText().to_latex() + + +def test_text_class(): + + assert Text("

Some Text

").to_html() == "

Some Text

" + assert Text("__Some Text__").to_latex() == "__Some Text__" + + +def test_md_class(): + + assert Md("**text**").to_html() == "text" + + +def test_html_class(): + + assert Html("text").to_html() == "text" + assert Html("text").to_latex() == "text" + + +def test_latex_escape(): + + assert _latex_escape("a & b") == "a \\& b" + assert _latex_escape("a & b & c") == "a \\& b \\& c" + assert _latex_escape("\\a_\\d") == "\\\\a\\_\\\\d" + + +def test_escape_pattern_str_latex(): + + assert escape_pattern_str_latex("{x}") == "{x}" + assert escape_pattern_str_latex("a $_{1} %ab {2}") == "a \\$\\_{1} \\%ab {2}" + assert escape_pattern_str_latex("a{b}c") == "a\\{b\\}c" + + +def test_process_text_html(): + + assert _process_text("a & ", context="html") == "a & <b>" + assert _process_text(Text("a & "), context="html") == "a & " + assert _process_text(Md("**a** & "), context="html") == "a & " + assert _process_text(Html("**a** & "), context="html") == "**a** & " + assert _process_text(None, context="html") == "" + + +def test_process_text_latex(): + + assert _process_text("a & _b_", context="latex") == "a \\& \\_b\\_" + assert _process_text(Text("\\_\\$"), context="latex") == "\\_\\$" + assert _process_text(Html("**a** & "), context="latex") == "**a** \\& " + assert _process_text(None, context="latex") == "" + + with pytest.raises(NotImplementedError) as exc_info: + _process_text(Md("**a** & "), context="latex") + + assert "Markdown to LaTeX conversion is not supported yet" in exc_info.value.args[0] + + +def test_process_text_raises(): + + with pytest.raises(TypeError) as exc_info: + _process_text(1, context="html") # type: ignore + + assert "Invalid type: " in exc_info.value.args[0] diff --git a/tests/test_utils.py b/tests/test_utils.py index df6323703..60d179431 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -1,5 +1,8 @@ from collections.abc import Generator import pytest + +from great_tables import GT, exibble +from great_tables._tbl_data import is_na from great_tables._utils import ( _assert_list_is_subset, _assert_str_in_set, @@ -8,6 +11,7 @@ _collapse_list_elements, _insert_into_list, _match_arg, + _migrate_unformatted_to_output, OrderedSet, _str_scalar_to_list, heading_has_subtitle, @@ -175,3 +179,41 @@ def test_seq_groups_raises(): with pytest.raises(RuntimeError) as exc_info: next(seq_groups([])) assert "StopIteration" in str(exc_info.value) + + +def test_migrate_unformatted_to_output_latex(): + + gt_tbl = GT(exibble.head(2)).fmt_number(columns="num", decimals=3) + + # After rendering the data cells all the unformatted cells will be NA values in the + # body of the table + rendered = gt_tbl._render_formats(context="latex") + + assert is_na(rendered._body.body, rendered._body.body["char"].tolist()).tolist() == [True, True] + + # Migrate unformatted data to their corresponding data cells, the expectation is that + # unformatted cells will no longer be NA but have the values from the original data + migrated = _migrate_unformatted_to_output( + data=rendered, data_tbl=rendered._tbl_data, formats=rendered._formats, context="latex" + ) + + assert migrated._body.body["char"].tolist() == ["apricot", "banana"] + + +def test_migrate_unformatted_to_output_html(): + + gt_tbl = GT(exibble.head(2)).fmt_number(columns="num", decimals=3) + + # After rendering the data cells all the unformatted cells will be NA values in the + # body of the table + rendered = gt_tbl._render_formats(context="html") + + assert is_na(rendered._body.body, rendered._body.body["char"].tolist()).tolist() == [True, True] + + # For HTML output, the `_migrate_unformatted_to_output()` has not been implemented yet so + # we expect the same output as the input (NA values for unformatted cells) + migrated = _migrate_unformatted_to_output( + data=rendered, data_tbl=rendered._tbl_data, formats=rendered._formats, context="html" + ) + + assert is_na(migrated._body.body, migrated._body.body["char"].tolist()).tolist() == [True, True] diff --git a/tests/test_utils_render_latex.py b/tests/test_utils_render_latex.py new file mode 100644 index 000000000..3d5d0fee9 --- /dev/null +++ b/tests/test_utils_render_latex.py @@ -0,0 +1,561 @@ +import pytest +from unittest import mock +import pandas as pd +import os + +from great_tables import GT, exibble +from great_tables.data import gtcars + +from great_tables._utils_render_latex import ( + is_css_length_string, + is_number_without_units, + css_length_has_supported_units, + get_px_conversion, + get_units_from_length_string, + convert_to_px, + convert_to_pt, + create_wrap_start_l, + create_fontsize_statement_l, + create_heading_component_l, + create_body_component_l, + create_columns_component_l, + create_footer_component_l, + create_wrap_end_l, + create_table_end_l, + create_table_start_l, + derive_table_width_statement_l, + _render_as_latex, +) + + +@pytest.fixture +def gt_tbl(): + return GT(pd.DataFrame({"x": [1, 2], "y": [4, 5]})) + + +@pytest.fixture +def gt_tbl_dec(): + return GT(pd.DataFrame({"x": [1.52, 2.23], "y": [4.75, 5.23]})) + + +@pytest.fixture +def gt_tbl_sci(): + return GT(pd.DataFrame({"x": [465633.46, -0.00000000345], "y": [4.509, 176.23]})) + + +@pytest.fixture +def gt_tbl_pct(): + return GT(pd.DataFrame({"x": [0.53, 0.0674], "y": [0.17, 0.32]})) + + +@pytest.fixture +def gt_tbl_dttm(): + return GT( + pd.DataFrame( + { + "date": ["2023-08-12", "2020-11-17"], + "time": ["09:21:23", "22:45:02"], + "dttm": ["2023-08-12 09:21:23", "2020-11-17 22:45:02"], + } + ) + ) + + +def test_is_css_length_string(): + + assert is_css_length_string("12.5pt") + assert is_css_length_string("12.5px") + assert is_css_length_string("12.5") + assert is_css_length_string("12.5units") + + +def test_is_number_without_units(): + + assert is_number_without_units("12.5") + assert not is_number_without_units("12.5pt") + + +def test_css_length_has_supported_units(): + + assert css_length_has_supported_units("12.5pt") + assert css_length_has_supported_units("12.5px") + assert css_length_has_supported_units("12.5") + assert css_length_has_supported_units("12.5pt", no_units_valid=False) + assert css_length_has_supported_units("12.5px", no_units_valid=False) + assert not css_length_has_supported_units("12.5", no_units_valid=False) + assert not css_length_has_supported_units("12.8units") + assert not css_length_has_supported_units("units12.8") + + +def test_get_units_from_length_string(): + + assert get_units_from_length_string("12.5pt") == "pt" + assert get_units_from_length_string("") == "px" + + +def test_get_px_conversion_val(): + + assert get_px_conversion(length="2343.23pt") == 4 / 3 + assert get_px_conversion(length="43.2px") == 1.0 + + +def test_get_px_conversion_val_raises(): + + with pytest.raises(ValueError) as exc_info: + get_px_conversion(length="12.8bolts") + + assert "Invalid units: bolts" in exc_info.value.args[0] + + +def test_convert_to_px(): + + assert convert_to_px("12.5pt") == 17.0 + assert convert_to_px("12.5px") == 12.5 + + +def test_convert_to_pt(): + + assert convert_to_pt("16px") == 12.0 + + +def test_create_fontsize_statement_l(gt_tbl: GT): + + assert create_fontsize_statement_l(gt_tbl) == "\\fontsize{12.0pt}{14.4pt}\\selectfont\n" + + +def test_create_fontsize_statement_l_pt(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_font_size="18.2pt") + + assert create_fontsize_statement_l(gt_tbl_new) == "\\fontsize{18.2pt}{21.8pt}\\selectfont\n" + + +def test_create_fontsize_statement_l_px(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_font_size="11px") + + assert create_fontsize_statement_l(gt_tbl_new) == "\\fontsize{8.2pt}{9.9pt}\\selectfont\n" + + +def test_create_fontsize_statement_l_pct(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_font_size="50%") + + assert create_fontsize_statement_l(gt_tbl_new) == "\\fontsize{6.0pt}{7.2pt}\\selectfont\n" + + +def test_create_fontsize_statement_l_cm(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_font_size="0.6cm") + + assert create_fontsize_statement_l(gt_tbl_new) == "\\fontsize{17.2pt}{20.7pt}\\selectfont\n" + + +def test_create_fontsize_statement_l_unknown_unit(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_font_size="1span") + + assert create_fontsize_statement_l(gt_tbl_new) == "" + + +def test_derive_table_width_statement_l_px_lt(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_width="500px") + + assert ( + derive_table_width_statement_l(gt_tbl_new, use_longtable=True) + == "\\setlength\\LTleft{\\dimexpr(0.5\\linewidth - 187.5pt)}\n\\setlength\\LTright{\\dimexpr(0.5\\linewidth - 187.5pt)}" + ) + + +def test_derive_table_width_statement_l_pct_lt(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_width="45%") + + assert ( + derive_table_width_statement_l(gt_tbl_new, use_longtable=True) + == "\\setlength\\LTleft{0.275\\linewidth}\n\\setlength\\LTright{0.275\\linewidth}" + ) + + +def test_derive_table_width_statement_l_px_no_lt(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_width="500px") + + assert derive_table_width_statement_l(gt_tbl_new, use_longtable=False) == "" + + +def test_create_fontsize_statement_l_settings(): + + gt_tbl = GT(exibble).tab_options(table_font_size="18.5px") + + assert create_fontsize_statement_l(gt_tbl) == "\\fontsize{13.9pt}{16.6pt}\\selectfont\n" + + +def test_create_heading_component_l(): + + gt_tbl_no_heading = GT(exibble) + gt_tbl_title = GT(exibble).tab_header(title="Title") + gt_tbl_title_subtitle = GT(exibble).tab_header(title="Title", subtitle="Subtitle") + + assert create_heading_component_l(gt_tbl_no_heading, use_longtable=False) == "" + assert ( + create_heading_component_l(gt_tbl_title, use_longtable=False) + == "\\caption*{\n{\\large Title}\n} " + ) + assert ( + create_heading_component_l(gt_tbl_title_subtitle, use_longtable=False) + == "\\caption*{\n{\\large Title} \\\\\n{\\small Subtitle}\n} " + ) + + +def test_create_columns_component_l_simple(): + + gt_tbl = GT(exibble) + + assert ( + create_columns_component_l(data=gt_tbl) + == "\\toprule\nnum & char & fctr & date & time & datetime & currency & row & group \\\\ \n\\midrule\\addlinespace[2.5pt]" + ) + + +def test_create_columns_component_l_simple_hidden_cols(): + + gt_tbl = GT(exibble).cols_hide(columns=["char", "date"]) + + assert ( + create_columns_component_l(data=gt_tbl) + == "\\toprule\nnum & fctr & time & datetime & currency & row & group \\\\ \n\\midrule\\addlinespace[2.5pt]" + ) + + +def test_create_columns_component_l_one_spanner(): + + gt_tbl = GT(exibble).tab_spanner(label="Spanner", columns=["num", "char"]) + + assert ( + create_columns_component_l(data=gt_tbl) + == "\\toprule\n\\multicolumn{2}{c}{Spanner} & \\\\ \n\\cmidrule(lr){1-2}\nnum & char & fctr & date & time & datetime & currency & row & group \\\\ \n\\midrule\\addlinespace[2.5pt]" + ) + + +def test_create_columns_component_l_adjacent_spanners_hiding(): + + gt_tbl = ( + GT(exibble) + .tab_spanner(label="Spanner 1", columns=["num", "char"]) + .tab_spanner(label="Spanner 2", columns=["date", "time"]) + .tab_spanner(label="Spanner 3", columns=["currency", "row"]) + .cols_hide(columns="row") + ) + + assert ( + create_columns_component_l(data=gt_tbl) + == "\\toprule\n\\multicolumn{2}{c}{Spanner 1} & & \\multicolumn{2}{c}{Spanner 2} & & \\multicolumn{1}{c}{Spanner 3} & \\\\ \n\\cmidrule(lr){1-2} \\cmidrule(lr){4-5} \\cmidrule(lr){7-7}\nnum & char & fctr & date & time & datetime & currency & group \\\\ \n\\midrule\\addlinespace[2.5pt]" + ) + + +def test_create_columns_component_l_many_spanners(): + + gt_tbl = ( + GT(exibble) + .tab_spanner(label="Spanner 1", columns=["num", "char"]) + .tab_spanner(label="Spanner 2", columns=["date", "time"]) + .tab_spanner(label="Spanner 3", columns=["currency", "row"]) + .tab_spanner(label="Spanner Above 1", columns=["char", "fctr"]) + .tab_spanner(label="Spanner Above 2", columns=["time", "datetime"]) + ) + + assert ( + create_columns_component_l(data=gt_tbl) + == "\\toprule\n & \\multicolumn{2}{c}{Spanner Above 1} & & \\multicolumn{2}{c}{Spanner Above 2} & \\\\ \n\\cmidrule(lr){2-3} \\cmidrule(lr){5-6}\n\\multicolumn{2}{c}{Spanner 1} & & \\multicolumn{2}{c}{Spanner 2} & & \\multicolumn{2}{c}{Spanner 3} & \\\\ \n\\cmidrule(lr){1-2} \\cmidrule(lr){4-5} \\cmidrule(lr){7-8}\nnum & char & fctr & date & time & datetime & currency & row & group \\\\ \n\\midrule\\addlinespace[2.5pt]" + ) + + +def test_create_body_component_l_simple(gt_tbl: GT): + + assert create_body_component_l(data=gt_tbl) == "1 & 4 \\\\\n2 & 5 \\\\" + + +def test_create_footer_component_one_note(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_source_note(source_note="Source Note.") + + assert ( + create_footer_component_l(gt_tbl_new) + == "\\begin{minipage}{\\linewidth}\nSource Note.\\\\\n\\end{minipage}" + ) + + +def test_create_footer_component_two_notes(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_source_note(source_note="Source Note 1.").tab_source_note( + source_note="Source Note 2." + ) + + assert ( + create_footer_component_l(gt_tbl_new) + == "\\begin{minipage}{\\linewidth}\nSource Note 1.\\\\\nSource Note 2.\\\\\n\\end{minipage}" + ) + + +def test_create_footer_component_no_notes(gt_tbl: GT): + + assert create_footer_component_l(gt_tbl) == "" + + +def test_create_body_component_l_fmt_number(gt_tbl_dec: GT): + + gt_tbl_built = gt_tbl_dec.fmt_number( + columns="x", rows=0, scale_by=-1, decimals=3, pattern="{x} _" + )._build_data(context="latex") + + assert create_body_component_l(data=gt_tbl_built) == "-1.520 \\_ & 4.75 \\\\\n2.23 & 5.23 \\\\" + + +def test_create_body_component_l_fmt_integer(gt_tbl_dec: GT): + + gt_tbl_built = gt_tbl_dec.fmt_integer( + columns="x", rows=0, scale_by=-1, pattern="{x} _" + )._build_data(context="latex") + + assert create_body_component_l(data=gt_tbl_built) == "-2 \\_ & 4.75 \\\\\n2.23 & 5.23 \\\\" + + +def test_create_body_component_l_fmt_scientific(gt_tbl_sci: GT): + + gt_tbl_built = gt_tbl_sci.fmt_scientific(columns="x", pattern="{x} _")._build_data( + context="latex" + ) + + assert ( + create_body_component_l(data=gt_tbl_built) + == "4.66 $\\times$ 10\\textsuperscript{5} \\_ & 4.509 \\\\\n-3.45 $\\times$ 10\\textsuperscript{-9} \\_ & 176.23 \\\\" + ) + + +def test_create_body_component_l_fmt_percent(gt_tbl_pct: GT): + + gt_tbl_built = gt_tbl_pct.fmt_percent(columns="x", pattern="{x} _")._build_data(context="latex") + + assert ( + create_body_component_l(data=gt_tbl_built) + == "53.00\\% \\_ & 0.17 \\\\\n6.74\\% \\_ & 0.32 \\\\" + ) + + +def test_create_body_component_l_fmt_currency(gt_tbl_dec: GT): + + gt_tbl_built = gt_tbl_dec.fmt_currency(columns="x", pattern="{x} _")._build_data( + context="latex" + ) + + assert ( + create_body_component_l(data=gt_tbl_built) + == "\\$1.52 \\_ & 4.75 \\\\\n\\$2.23 \\_ & 5.23 \\\\" + ) + + +def test_create_body_component_l_fmt_bytes(gt_tbl_sci: GT): + + gt_tbl_built = gt_tbl_sci.fmt_bytes(columns="x", pattern="{x} _")._build_data(context="latex") + + assert ( + create_body_component_l(data=gt_tbl_built) + == "465.6 kB \\_ & 4.509 \\\\\n0 B \\_ & 176.23 \\\\" + ) + + +def test_create_body_component_l_fmt_date(gt_tbl_dttm: GT): + + gt_tbl_built = gt_tbl_dttm.fmt_date( + columns="date", date_style="wday_month_day_year", pattern="{x} _" + )._build_data(context="latex") + + assert ( + create_body_component_l(data=gt_tbl_built) + == "Saturday, August 12, 2023 \\_ & 09:21:23 & 2023-08-12 09:21:23 \\\\\nTuesday, November 17, 2020 \\_ & 22:45:02 & 2020-11-17 22:45:02 \\\\" + ) + + +def test_create_body_component_l_fmt_time(gt_tbl_dttm: GT): + + gt_tbl_built = gt_tbl_dttm.fmt_time( + columns="time", time_style="h_m_s_p", pattern="{x} _" + )._build_data(context="latex") + + assert ( + create_body_component_l(data=gt_tbl_built) + == "2023-08-12 & 9:21:23 AM \\_ & 2023-08-12 09:21:23 \\\\\n2020-11-17 & 10:45:02 PM \\_ & 2020-11-17 22:45:02 \\\\" + ) + + +def test_create_body_component_l_fmt_datetime(gt_tbl_dttm: GT): + + gt_tbl_built = gt_tbl_dttm.fmt_datetime( + columns="dttm", date_style="wday_month_day_year", time_style="h_m_s_p", pattern="{x} _" + )._build_data(context="latex") + + assert ( + create_body_component_l(data=gt_tbl_built) + == "2023-08-12 & 09:21:23 & Saturday, August 12, 2023 9:21:23 AM \\_ \\\\\n2020-11-17 & 22:45:02 & Tuesday, November 17, 2020 10:45:02 PM \\_ \\\\" + ) + + +def test_create_body_component_l_fmt_roman(gt_tbl_dec: GT): + + gt_tbl_built = gt_tbl_dec.fmt_roman(columns="x", rows=0, pattern="{x} _")._build_data( + context="latex" + ) + + assert create_body_component_l(data=gt_tbl_built) == "II \\_ & 4.75 \\\\\n2.23 & 5.23 \\\\" + + +def test_create_wrap_start(): + + assert create_wrap_start_l(use_longtable=False, tbl_pos=None) == "\\begin{table}[!t]" + assert create_wrap_start_l(use_longtable=False, tbl_pos="!b") == "\\begin{table}[!b]" + assert create_wrap_start_l(use_longtable=True, tbl_pos=None) == "\\begingroup" + + +@mock.patch.dict(os.environ, {"QUARTO_BIN_PATH": "1"}, clear=True) +def test_create_wrap_start_quarto(): + + assert create_wrap_start_l(use_longtable=False, tbl_pos="!t") == "\\begin{table}" + assert create_wrap_start_l(use_longtable=True, tbl_pos="!t") == "\\begingroup" + + +def test_create_wrap_end_l(): + + assert create_wrap_end_l(use_longtable=False) == "\\end{table}" + assert create_wrap_end_l(use_longtable=True) == "\\endgroup" + + +def test_create_table_end_l_longtable(): + + assert create_table_end_l(use_longtable=False) == "\\bottomrule\n\\end{tabular*}" + assert create_table_end_l(use_longtable=True) == "\\bottomrule\n\\end{longtable}" + + +def test_create_table_start_l_longtable(gt_tbl: GT): + + gt_tbl_no_source_notes = gt_tbl._build_data(context="latex") + gt_tbl_source_notes = gt_tbl.tab_source_note(source_note="Note")._build_data(context="latex") + + assert ( + create_table_start_l( + data=gt_tbl_no_source_notes, + use_longtable=True, + ) + == "\\begin{longtable}{rr}" + ) + + assert ( + create_table_start_l( + data=gt_tbl_source_notes, + use_longtable=True, + ) + == "\\setlength{\\LTpost}{0mm}\n\\begin{longtable}{rr}" + ) + + +def test_create_table_start_l_float_tbl_pct(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_width="50%") + + assert ( + create_table_start_l( + data=gt_tbl_new, + use_longtable=False, + ) + == "\\begin{tabular*}{0.5\\linewidth}{@{\\extracolsep{\\fill}}rr}" + ) + + +def test_create_table_start_l_float_tbl_px(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_width="500px") + + assert ( + create_table_start_l( + data=gt_tbl_new, + use_longtable=False, + ) + == "\\begin{tabular*}{375.0pt}{@{\\extracolsep{\\fill}}rr}" + ) + + +def test_create_table_start_l_float_tbl_auto(gt_tbl: GT): + + gt_tbl_new = gt_tbl.tab_options(table_width="auto") + + assert ( + create_table_start_l( + data=gt_tbl_new, + use_longtable=False, + ) + == "\\begin{tabular*}{\\linewidth}{@{\\extracolsep{\\fill}}rr}" + ) + + +def test_snap_render_as_latex_longtable(snapshot): + + gt_tbl = ( + GT( + gtcars[["mfr", "model", "hp", "trq", "msrp"]].head(5), + ) + .tab_header(title="The _title_", subtitle="The subtitle") + .tab_spanner(label="Make _and_ Model", columns=["mfr", "model"]) + .tab_spanner(label="Performance", columns=["hp", "trq"]) + .fmt_currency(columns="msrp") + .tab_source_note("Note 1") + .tab_source_note("Note 2") + .tab_options(table_width="600px", table_font_size="12px") + ) + + latex_str = _render_as_latex( + data=gt_tbl._build_data(context="latex"), use_longtable=True, tbl_pos=None + ) + + assert snapshot == latex_str + + +def test_snap_render_as_latex_floating_table(snapshot): + + gt_tbl = ( + GT( + gtcars[["mfr", "model", "hp", "trq", "msrp"]].head(5), + ) + .tab_header(title="The _title_", subtitle="The subtitle") + .tab_spanner(label="Make _and_ Model", columns=["mfr", "model"]) + .tab_spanner(label="Performance", columns=["hp", "trq"]) + .fmt_currency(columns="msrp") + .tab_source_note("Note 1") + .tab_source_note("Note 2") + .tab_options(table_width="600px", table_font_size="12px") + ) + + latex_str = _render_as_latex( + data=gt_tbl._build_data(context="latex"), use_longtable=False, tbl_pos=None + ) + + assert snapshot == latex_str + + +def test_render_as_latex_stub_raises(): + gt_tbl = GT(exibble, rowname_col="row") + with pytest.raises(NotImplementedError) as exc_info: + _render_as_latex(data=gt_tbl._build_data(context="latex")) + + assert ( + "The table stub (row names and/or row groups) are not yet supported in LaTeX output." + in exc_info.value.args[0] + ) + + +def test_render_as_latex_rowgroup_raises(): + gt_tbl = GT(exibble, groupname_col="group") + with pytest.raises(NotImplementedError) as exc_info: + _render_as_latex(data=gt_tbl._build_data(context="latex")) + + assert "Row groups are not yet supported in LaTeX output." in exc_info.value.args[0]