"""
Contains several functions to prepare and format tables for LaTeX documents.
"""
# Author: Joao Fonseca <jpmrfonseca@gmail.com>
# License: MIT
from itertools import product
import numpy as np
import pandas as pd
def _check_indices(table_index, indices):
"""Formats the ``indices`` parameter passed."""
if indices is None:
indices = table_index.to_frame().to_dict("list")
# Check indices - list
if type(indices) is list:
if all([type(i) in [str, int] for i in indices]):
indices_ = {table_index.names[0]: {i: i for i in indices}}
elif all([type(i) is list for i in indices]):
indices_ = {
name: {i: i for i in values}
for name, values in zip(table_index.names, indices)
}
elif all([type(i) is dict for i in indices]):
indices_ = {
name: values for name, values in zip(table_index.names, indices)
}
# Check indices - dict
if type(indices) is dict:
if all([type(i) is str for i in list(indices.values())]):
index_name = table_index.names[0] if table_index.names[0] is not None else 0
indices_ = {index_name: indices}
elif all([type(i) is list for i in list(indices.values())]):
indices_ = {
name: {i: i for i in values} for name, values in indices.items()
}
elif all([type(i) is dict for i in list(indices.values())]):
indices_ = indices
# Replace index name if there's only a single index with no name
if len(indices_) == 1 and None in indices_.keys():
indices_[0] = indices_[None]
indices_.pop(None)
return indices_
def _make_bold(row, maximum=True, decimals=2, threshold=None, with_sem=False):
"""
Make bold the lowest or highest value(s).
with_sem returns an incomplete textbf latex function and a mask array.
This function should be used in a pandas dataframe using the .apply method.
"""
row = round(row, decimals)
if threshold is None:
val = row.max() if maximum else row.min()
mask = row == val
else:
mask = (row > threshold) if maximum else (row < threshold)
formatter = "{0:.%sf}" % decimals
row = row.apply(lambda el: formatter.format(el))
row[mask] = [
"\\textbf{%s" % v if with_sem else "\\textbf{%s}" % v for v in row[mask]
]
# Return mask only if function is being used to generate
# a table with sem values
if with_sem:
return row, mask
else:
return row
[docs]
def make_bold(table, maximum=True, threshold=None, decimals=2, axis=1):
"""
Make bold the lowest or highest values, or values lower than, or higher than the
passed value in ``threshold`` per row or column.
Parameters
----------
table : pd.DataFrame
Dataframe with results statistics. Must not contain non-indexed metadata.
Supports both single and multi index.
maximum : bool, default=True
Whether to look for the highest or lowest values:
- If True and ``threshold`` is None, boldfaces the highest value in each
row/column.
- If False and ``threshold`` is None, boldfaces the lowest value in each
row/column.
- If True and ``threshold`` is not None, boldfaces all values above the given
threshold.
- If False and ``threshold`` is not None, boldfaces all values below the given
threshold.
threshold : int or float, default=None
Threshold to boldface values. If None, one value will be boldfaced per row or
column. If not None, boldfaces all values above or below ``threshold``.
decimals : int, default=2
Number of decimal places to round each value to.
axis : {0 or 'index', 1 or 'columns'}, default=1
Axis along which the function is applied:
- 0 or 'index': apply function to column.
- 1 or 'columns': apply function to each row.
"""
return table.apply(
lambda row: _make_bold(
row, maximum=maximum, decimals=decimals, threshold=threshold
),
axis=axis,
)
[docs]
def make_mean_sem_table(
mean_vals,
sem_vals=None,
make_bold=False,
maximum=True,
threshold=None,
decimals=2,
axis=1,
):
"""
Generate table with rounded decimals, bold maximum/minimum values or values
above/below a given threshold, and combine mean and sem values.
Parameters
----------
mean_vals : pd.DataFrame
Dataframe with results statistics. Must not
contain non-indexed metadata. Supports both single and multi index.
sem_vals : {pd.DataFrame or np.ndarray}, default=None
Dataframe with standard errors of the means. If it is a DataFrame, must not
contain non-indexed metadata. Supports both single and multi index.
make_bold : bool, default=False
If True, make bold the lowest or highest values, or values lower than, or higher
than the passed value in ``threshold`` per row or column. If False, the
parameters ``maximum``, ``threshold`` and ``axis`` are ignored.
maximum : bool, default=True
Whether to look for the highest or lowest values:
- If True and ``threshold`` is None, boldfaces the highest value in each
row/column.
- If False and ``threshold`` is None, boldfaces the lowest value in each
row/column.
- If True and ``threshold`` is not None, boldfaces all values above the given
threshold.
- If False and ``threshold`` is not None, boldfaces all values below the given
threshold.
threshold : int or float, default=None
Threshold to boldface values. If None, one value will be boldfaced per row or
column. If not None, boldfaces all values above or below ``threshold``.
decimals : int, default=2
Number of decimal places to round each value to.
axis : {0 or 'index', 1 or 'columns'}, default=1
Axis along which the function is applied:
- 0 or 'index': apply function to column.
- 1 or 'columns': apply function to each row.
Returns
-------
scores : pd.DataFrame
Dataframe with the specified formatting.
"""
if sem_vals is not None:
if type(sem_vals) is np.ndarray:
sem_vals = pd.DataFrame(
sem_vals, index=mean_vals.index, columns=mean_vals.columns
)
scores = (
mean_vals.map(("{:,.%sf}" % decimals).format)
+ r" $\pm$ "
+ sem_vals.map(("{:,.%sf}" % decimals).format)
)
else:
scores = mean_vals.map(("{:,.%sf}" % decimals).format)
if make_bold:
mask = mean_vals.apply(
lambda row: _make_bold(row, maximum, decimals, threshold, with_sem=True)[1],
axis=axis,
).values
scores.iloc[:, :] = np.where(mask, "\\textbf{" + scores + "}", scores)
return scores
[docs]
def export_longtable(table, path=None, caption=None, label=None, index=True):
"""
Exports a pandas dataframe to LaTeX (longtable) format.
This function replaces ``df.to_latex`` when there are latex commands in
the table. Requires ``\\usepackage{booktabs}`` and ``\\usepackage{longtable}`` in the
LaTeX preamble.
.. warning::
Do not pass a table with column names, index names or values containing an
underscore ("_"), as it will result in an error in LaTeX. If you wish to have
underscores in the LaTeX table, add a backslash before the underscore.
Parameters
----------
table : pd.DataFrame
Dataframe with results statistics. Must not contain non-indexed metadata.
Supports both single and multi index.
path : str, default=None
File path to write to. If None, the output is returned as a string.
caption : str or tuple, default=None
Tuple (full_caption, short_caption), which results in
``\\caption[short_caption]{full_caption}``; if a single string is passed, no
short caption will be set.
label : str, default=None
The LaTeX label to be placed inside ``\\label{}`` in the output. This is used
with ``\\ref{}`` in the main ``.tex`` file.
index : bool, default=True
Write row names (index).
Returns
-------
tex_table : str or None
If ``path`` is None, returns the result as a string. Otherwise returns None.
"""
tex_table = (
table.to_latex(
longtable=True,
caption=caption,
label=label,
index=index,
column_format="c" * table.shape[1],
)
.replace(r"\textbackslash ", "\\")
.replace(r"\{", "{")
.replace(r"\}", "}")
.replace(r"\$", "$")
)
if path is not None:
open(path, "w").write(tex_table)
else:
return tex_table