Source code for research.utils._check_pipelines
from itertools import product
from sklearn.base import clone
from sklearn.preprocessing import FunctionTransformer
from sklearn.model_selection import ParameterGrid
from imblearn.pipeline import Pipeline
from rlearn.utils import check_random_states
[docs]def check_pipelines(objects_list, random_state, n_runs):
"""Extract estimators and parameters grids."""
# Create random states
random_states = check_random_states(random_state, n_runs)
pipelines = []
param_grid = []
for comb, rs in product(product(*objects_list), random_states):
name = "|".join([i[0] for i in comb])
# name, object, sub grid
comb = [
(nm, ob, ParameterGrid(sg))
if ob is not None
else (nm, FunctionTransformer(), ParameterGrid(sg))
for nm, ob, sg in comb
]
# Create estimator
if name not in [n[0] for n in pipelines]:
est = Pipeline([(nm, ob) for nm, ob, _ in comb])
pipelines.append((name, est))
# Create intermediate parameter grids
sub_grids = [
[{f"{nm}__{k}": v for k, v in param_def.items()} for param_def in sg]
for nm, obj, sg in comb
]
# Create parameter grids
for sub_grid in product(*sub_grids):
param_prefix = "" if len(comb) == 1 else f"{name}__"
grid = {"est_name": [name]}
grid.update(
{f"{param_prefix}{k}": [v] for d in sub_grid for k, v in d.items()}
)
random_states = {
f"{param_prefix}{param}": [rs]
for param in est.get_params()
if "random_state" in param
}
grid.update(random_states)
# Avoid multiple runs over pipelines without random state
if grid not in param_grid:
param_grid.append(grid)
return pipelines, param_grid
[docs]def check_pipelines_wrapper(
objects_list, wrapper, random_state, n_runs, wrapped_only=False
):
wrapper_label = wrapper[0]
wrapper_obj = wrapper[1]
wrapper_grid = wrapper[2]
estimators, param_grids = check_pipelines(objects_list, random_state, n_runs)
wrapped_estimators = [
(
f"{wrapper_label}|{name}",
clone(wrapper_obj).set_params(**{"classifier": pipeline}),
)
for name, pipeline in estimators
]
wrapped_param_grids = [
{
"est_name": [f'{wrapper_label}|{d["est_name"][0]}'],
**{
f'{wrapper_label}|{d["est_name"][0]}__classifier__{k}': v
for k, v in d.items()
if k != "est_name"
},
**{
f'{wrapper_label}|{d["est_name"][0]}__{k}': v
for k, v in wrapper_grid.items()
},
}
for d in param_grids
]
if wrapped_only:
return wrapped_estimators, wrapped_param_grids
else:
return (estimators + wrapped_estimators, param_grids + wrapped_param_grids)