Source code for slickml.visualization._glmnet

from typing import Any, Dict, Optional, Tuple, Union

import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from matplotlib.figure import Figure

from slickml.utils import check_var

sns.set_style("ticks")
mpl.rcParams["axes.linewidth"] = 2
mpl.rcParams["lines.linewidth"] = 2


[docs] def plot_glmnet_cv_results( figsize: Optional[Tuple[Union[int, float], Union[int, float]]] = (8, 5), marker: Optional[str] = "o", markersize: Optional[Union[int, float]] = 5, color: Optional[str] = "red", errorbarcolor: Optional[str] = "black", maxlambdacolor: Optional[str] = "purple", bestlambdacolor: Optional[str] = "navy", linestyle: Optional[str] = "--", fontsize: Optional[Union[int, float]] = 12, grid: Optional[bool] = True, legend: Optional[bool] = True, legendloc: Optional[Union[int, str]] = "best", xlabel: Optional[str] = None, ylabel: Optional[str] = None, title: Optional[str] = None, save_path: Optional[str] = None, display_plot: Optional[bool] = True, return_fig: Optional[bool] = False, **kwargs: Dict[str, Any], ) -> Optional[Figure]: """Visualizes the GLMNet cross-validation results. Notes ----- This plotting function can be used along with ``results_`` attribute of any of ``GLMNetCVClassifier``, or ``GLMNetCVRegressor`` classes as ``kwargs``. Parameters ---------- figsize : tuple, optional Figure size, by default (8, 5) marker : str, optional Marker style of the metric to distinguish the error bars. More valid marker styles can be found at [markers-api]_, by default "o" markersize : Union[int, float], optional Markersize, by default 5 color : str, optional Line and marker color, by default "red" errorbarcolor : str, optional Error bar color, by default "black" maxlambdacolor : str, optional Color of vertical line for ``lambda_max_``, by default "purple" bestlambdacolor : str, optional Color of vertical line for ``lambda_best_``, by default "navy" linestyle : str, optional Linestyle of vertical lambda lines, by default "--" fontsize : Union[int, float], optional Fontsize of the title. The fontsizes of xlabel, ylabel, tick_params, and legend are resized with 0.85, 0.85, 0.75, and 0.85 fraction of title fontsize, respectively, by default 12 grid : bool, optional Whether to show (x,y) grid on the plot or not, by default True legend : bool, optional Whether to show legend on the plot or not, by default True legendloc : Union[int, str], optional Location of legend, by default "best" xlabel : str, optional Xlabel of the plot, by default "-Log(Lambda)" ylabel : str, optional Ylabel of the plot, by default "{n_splits}-Folds CV Mean {metric}" title : str, optional Title of the plot, by default "Best {lambda_best} with {n} Features" save_path : str, optional The full or relative path to save the plot including the image format such as "myplot.png" or "../../myplot.pdf", by default None display_plot : bool, optional Whether to show the plot, by default True return_fig : bool, optional Whether to return figure object, by default False **kwargs : Dict[str, Any] Key-value pairs of results. ``results_`` attribute can be used See Also -------- :class:`slickml.classification.GLMNetCVClassifier` :class:`slickml.regression.GLMNetCVRegressor` References ---------- .. [markers-api] https://matplotlib.org/stable/api/markers_api.html Returns ------- Figure, optional """ check_var( figsize, var_name="figsize", dtypes=tuple, ) check_var( marker, var_name="marker", dtypes=str, ) check_var( markersize, var_name="markersize", dtypes=(float, int), ) check_var( color, var_name="color", dtypes=str, ) check_var( errorbarcolor, var_name="errorbarcolor", dtypes=str, ) check_var( maxlambdacolor, var_name="maxlambdacolor", dtypes=str, ) check_var( bestlambdacolor, var_name="bestlambdacolor", dtypes=str, ) check_var( linestyle, var_name="linestyle", dtypes=str, ) check_var( fontsize, var_name="fontsize", dtypes=(int, float), ) check_var( grid, var_name="grid", dtypes=bool, ) check_var( legend, var_name="legend", dtypes=bool, ) check_var( legendloc, var_name="legendloc", dtypes=(int, str), ) if xlabel: check_var( xlabel, var_name="xlabel", dtypes=str, ) else: xlabel = _xlabel() if ylabel: check_var( ylabel, var_name="ylabel", dtypes=str, ) else: ylabel = _ylabel(**kwargs) if title: check_var( title, var_name="title", dtypes=str, ) else: title = _title(**kwargs) check_var( display_plot, var_name="display_plot", dtypes=bool, ) check_var( return_fig, var_name="return_fig", dtypes=bool, ) if save_path: check_var( save_path, var_name="save_path", dtypes=str, ) # TODO(amir): standardize the fontsize fig, ax = plt.subplots( figsize=figsize, ) ax.errorbar( -np.log(kwargs["lambda_path"]), kwargs["cv_mean_score"], yerr=kwargs["cv_standard_error"], color=color, ecolor=errorbarcolor, marker=marker, markersize=markersize, ) ax.vlines( -np.log(kwargs["lambda_max"]), ymin=min(kwargs["cv_mean_score"]) - 0.05, # type: ignore ymax=max(kwargs["cv_mean_score"]) + 0.05, # type: ignore linestyles=linestyle, color=maxlambdacolor, label=r"max $\lambda$", ) ax.vlines( -np.log(kwargs["lambda_best"]), ymin=min(kwargs["cv_mean_score"]) - 0.05, # type: ignore ymax=max(kwargs["cv_mean_score"]) + 0.05, # type: ignore linestyles=linestyle, color=bestlambdacolor, label=r"best $\lambda$", ) ax.set_ylim( [ min(kwargs["cv_mean_score"]) - 0.05, # type: ignore max(kwargs["cv_mean_score"]) + 0.05, # type: ignore ], ) ax.set_xlabel( xlabel, fontsize=fontsize * 0.85, # type: ignore ) ax.set_ylabel( ylabel, fontsize=fontsize * 0.85, # type: ignore ) ax.set_title( title, fontsize=fontsize, ) ax.tick_params( axis="both", which="major", labelsize=fontsize * 0.75, # type: ignore ) ax.grid(grid) if legend: ax.legend( loc=legendloc, prop={ "size": fontsize * 0.85, # type: ignore }, framealpha=0.0, ) if save_path: plt.savefig( save_path, bbox_inches="tight", dpi=200, ) if display_plot: plt.show() if return_fig: return fig return None
[docs] def plot_glmnet_coeff_path( figsize: Optional[Tuple[Union[int, float], Union[int, float]]] = (8, 5), linestyle: Optional[str] = "-", fontsize: Optional[Union[int, float]] = 12, grid: Optional[bool] = True, legend: Optional[bool] = True, legendloc: Optional[Union[int, str]] = "center", xlabel: Optional[str] = None, ylabel: Optional[str] = "Coefficients", title: Optional[str] = None, bbox_to_anchor: Tuple[float, float] = (1.1, 0.5), yscale: Optional[str] = "linear", save_path: Optional[str] = None, display_plot: Optional[bool] = True, return_fig: Optional[bool] = False, **kwargs: Dict[str, Any], ) -> Optional[Figure]: """Visualizes the GLMNet coefficients' paths. Parameters ---------- figsize : tuple, optional Figure size, by default (8, 5) linestyle : str, optional Linestyle of paths, by default "-" fontsize : Union[int, float], optional Fontsize of the title. The fontsizes of xlabel, ylabel, tick_params, and legend are resized with 0.85, 0.85, 0.75, and 0.85 fraction of title fontsize, respectively, by default 12 grid : bool, optional Whether to show (x,y) grid on the plot or not, by default True legend : bool, optional Whether to show legend on the plot or not, by default True legendloc : Union[int, str], optional Location of legend, by default "center" xlabel : str, optional Xlabel of the plot, by default "-Log(Lambda)" ylabel : str, optional Ylabel of the plot, by default "Coefficients" title : str, optional Title of the plot, by default "Best {lambda_best} with {n} Features" yscale : str, optional Scale for y-axis (coefficients). Possible options are ``"linear"``, ``"log"``, ``"symlog"``, ``"logit"`` [yscale]_, by default "linear" bbox_to_anchor : Tuple[float, float], optional Relative coordinates for legend location outside of the plot, by default (1.1, 0.5) save_path : str, optional The full or relative path to save the plot including the image format such as "myplot.png" or "../../myplot.pdf", by default None display_plot : bool, optional Whether to show the plot, by default True return_fig : bool, optional Whether to return figure object, by default False **kwargs : Dict[str, Any] Key-value pairs of results. ``results_`` attribute can be used References ---------- .. [yscale] https://matplotlib.org/stable/api/_as_gen/matplotlib.pyplot.yscale.html Returns ------- Figure, optional """ check_var( figsize, var_name="figsize", dtypes=tuple, ) check_var( linestyle, var_name="linestyle", dtypes=str, ) check_var( fontsize, var_name="fontsize", dtypes=(int, float), ) check_var( grid, var_name="grid", dtypes=bool, ) check_var( legend, var_name="legend", dtypes=bool, ) check_var( legendloc, var_name="legendloc", dtypes=(int, str), ) if xlabel: check_var( xlabel, var_name="xlabel", dtypes=str, ) else: xlabel = _xlabel() check_var( ylabel, var_name="ylabel", dtypes=str, ) if title: check_var( title, var_name="title", dtypes=str, ) else: title = _title(**kwargs) check_var( bbox_to_anchor, var_name="bbox_to_anchor", dtypes=tuple, ) check_var( yscale, var_name="yscale", dtypes=str, values=("linear", "log", "symlog", "logit"), ) check_var( display_plot, var_name="display_plot", dtypes=bool, ) check_var( return_fig, var_name="return_fig", dtypes=bool, ) if save_path: check_var( save_path, var_name="save_path", dtypes=str, ) fig, ax = plt.subplots(figsize=figsize) for feature, coeff_path in kwargs["coeff_path"].items(): if feature in kwargs["coeff"]: ax.plot( -np.log(kwargs["lambda_path"]), coeff_path, linestyle=linestyle, label=feature, ) ax.tick_params( axis="both", which="major", labelsize=fontsize * 0.75, # type: ignore ) ax.set_ylabel( ylabel, fontsize=fontsize * 0.85, # type: ignore ) ax.set_xlabel( xlabel, fontsize=fontsize * 0.85, # type: ignore ) ax.set_title( title, fontsize=fontsize, ) ax.set_yscale(yscale) ax.grid(grid) if legend: ax.legend( loc=legendloc, bbox_to_anchor=bbox_to_anchor, ncol=1, prop={ "size": fontsize * 0.75, # type: ignore }, framealpha=0.0, fancybox=True, ) if save_path: plt.savefig( save_path, bbox_inches="tight", dpi=200, ) if display_plot: plt.show() if return_fig: return fig return None
def _xlabel() -> str: """Returns xlabel. Returns ------- str """ return r"-$Log(\lambda)$" def _ylabel(**kwargs: Dict[str, Any]) -> str: """Returns ylabel. Parameters ---------- kwargs : Dict[str, Any] Key-value pairs of results (``results_``) Returns ------- str """ if kwargs["params"]["scoring"] is None: if kwargs["module"] == "glmnet.linear": ylabel = rf"""{kwargs["params"]["n_splits"]}-Folds CV Mean $R^2$""" elif kwargs["module"] == "glmnet.logistic": ylabel = f"""{kwargs["params"]["n_splits"]}-Folds CV Mean ACCURACY""" else: ylabel = f"""{kwargs["params"]["n_splits"]}-Folds CV Mean {' '.join((kwargs["params"]["scoring"]).split("_")).upper()}""" return ylabel def _title(**kwargs: Dict[str, Any]) -> str: """Returns title. Parameters ---------- kwargs : Dict[str, Any] Key-value pairs of results (``results_``) Returns ------- str """ return rf"""Best $\lambda$ = {kwargs["lambda_best"]:.3f} with {len(kwargs["coeff"])} Features"""