# Author: Georgios Douzas <gdouzas@icloud.com>
# Joao Fonseca <jpmrfonseca@gmail.com>
# License: MIT
import numpy as np
import pandas as pd
from os.path import join, dirname, abspath
[docs]def generate_mean_std_tbl(mean_vals, std_vals):
"""Generate table that combines mean and sem values."""
index = mean_vals.iloc[:, :2]
scores = mean_vals.iloc[:, 2:].applymap('{:,.2f}'.format) \
+ r" $\pm$ " + \
std_vals.iloc[:, 2:].applymap('{:,.2f}'.format)
tbl = pd.concat([index, scores], axis=1)
return tbl
[docs]def generate_pvalues_tbl(tbl):
"""Format p-values."""
for name in tbl.dtypes[tbl.dtypes == float].index:
tbl[name] = tbl[name].apply(lambda pvalue: '%.1e' % pvalue)
return tbl
[docs]def sort_tbl(
tbl,
ds_order=None,
ovrs_order=None,
clfs_order=None,
metrics_order=None
):
"""
Sort tables rows and columns. Mostly used to format results from
oversampling experiments.
"""
cols = tbl.columns
keys = ['Dataset', 'Oversampler', 'Classifier', 'Metric']
for key, cat in zip(
keys, (ds_order, ovrs_order, clfs_order, metrics_order)
):
if key in cols:
tbl[key] = pd.Categorical(tbl[key], categories=cat)
key_cols = [col for col in cols if col in keys]
tbl.sort_values(key_cols, inplace=True)
if ovrs_order is not None and set(ovrs_order).issubset(cols):
tbl = tbl[key_cols + list(ovrs_order)]
return tbl
[docs]def generate_paths(filepath):
"""
Generate data, results and analysis paths.
"""
prefix_path = join(dirname(abspath(filepath)), '..')
paths = [
join(prefix_path, name)
for name in ('data', 'results', 'analysis')
]
return paths
[docs]def make_bold(
row, maximum=True, num_decimals=2, threshold=None, with_sem=False
):
"""
Make bold the lowest or highest value(s).
with_sem simply returns an incomplete textbf latex function.
"""
row = round(row, num_decimals)
if threshold is None:
val = row.max() if maximum else row.min()
mask = (row == val)
else:
mask = (row > threshold) if maximum else (row < threshold)
formatter = '{0:.%sf}' % num_decimals
row = row.apply(lambda el: formatter.format(el))
row[mask] = [
'\\textbf{%s' % formatter.format(v)
if with_sem else '\\textbf{%s}' % formatter.format(v)
for v in row[mask].astype(float)
]
# Return mask only if function is being used to generate
# a table with sem values
if with_sem:
return row, mask
else:
return row
[docs]def generate_mean_std_tbl_bold(
mean_vals, std_vals, maximum=True, decimals=2, threshold=None
):
"""
Generate table that combines mean and sem values.
"""
mean_bold = mean_vals.apply(
lambda row: make_bold(
row, maximum, decimals, threshold, with_sem=True
)[0],
axis=1
)
mask = mean_vals.apply(
lambda row: make_bold(
row, maximum, decimals, threshold, with_sem=True
)[1],
axis=1
).values
std_bold = np.round(std_vals, decimals).astype(str)
std_bold = np.where(mask, std_bold+'}', std_bold)
scores = mean_bold + r" $\pm$ " + std_bold
return scores