Skip to content

Commit

Permalink
Merge pull request #257 from BCG-Gamma/dev/2.2.1
Browse files Browse the repository at this point in the history
BUILD: Release sklearndf 2.2.1
  • Loading branch information
j-ittner authored May 26, 2023
2 parents 020183a + 61e8ca7 commit 8335d8c
Show file tree
Hide file tree
Showing 9 changed files with 173 additions and 55 deletions.
9 changes: 9 additions & 0 deletions RELEASE_NOTES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,21 @@ Release Notes
.. |nbsp| unicode:: 0xA0
:trim:


*sklearndf* 2.2
---------------

*sklearndf* 2.2 adds support for
`scikit-learn 1.2 <https://scikit-learn.org/1.2>`_, and enhances the EstimatorDF
API.


2.2.1
~~~~~

- VIZ: use *scikit-learn*'s native HTML representation of estimators, if available


2.2.0
~~~~~

Expand Down Expand Up @@ -48,6 +56,7 @@ support of sparse output (see below).
*sklearndf* 2.1 adds support for
`scikit-learn 1.1 <https://scikit-learn.org/1.1>`_.


2.1.1
~~~~~

Expand Down
34 changes: 29 additions & 5 deletions azure-pipelines.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ schedules:
displayName: Nightly full build
branches:
include:
- 2.1.x
- 2.2.x

resources:
repositories:
Expand Down Expand Up @@ -243,7 +243,15 @@ stages:
- script: dir $(Build.SourcesDirectory)

- script: |
conda install -y -c anaconda conda-build~=3.21 conda-verify~=3.4 toml~=0.10 flit~=3.6 packaging~=20.9
# install micromamba
curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
export MAMBA_ROOT_PREFIX=~/micromamba
eval "$(./bin/micromamba shell hook -s posix)"
# create and activate a build environment, then install the tools we need
micromamba create -n build
micromamba activate build
micromamba install -y -c conda-forge boa~=0.14 toml~=0.10 flit~=3.6 packaging~=20.9
displayName: 'Install conda-build, flit, toml'
condition: eq(variables['BUILD_SYSTEM'], 'conda')
Expand All @@ -261,7 +269,11 @@ stages:
targetType: 'inline'
script: |
set -eux
if [ "$BUILD_SYSTEM" = "conda" ] ; then eval "$(conda shell.bash hook)" ; fi
if [ "$BUILD_SYSTEM" = "conda" ] ; then
export MAMBA_ROOT_PREFIX=~/micromamba
eval "$(./bin/micromamba shell hook -s posix)"
micromamba activate build
fi
export RUN_PACKAGE_VERSION_TEST=$(project_name)
cd $(Build.SourcesDirectory)/$(project_root)
Expand Down Expand Up @@ -330,7 +342,15 @@ stages:
- script: dir $(Build.SourcesDirectory)

- script: |
conda install -y -c anaconda conda-build~=3.21 conda-verify~=3.4 toml~=0.10 flit~=3.6 packaging~=20.9
# install micromamba
curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj bin/micromamba
export MAMBA_ROOT_PREFIX=~/micromamba
eval "$(./bin/micromamba shell hook -s posix)"
# create and activate a build environment, then install the tools we need
micromamba create -n build
micromamba activate build
micromamba install -y -c conda-forge boa~=0.14 toml~=0.10 flit~=3.6 packaging~=20.9
displayName: 'Install conda-build, flit, toml'
condition: eq(variables['BUILD_SYSTEM'], 'conda')
Expand All @@ -348,7 +368,11 @@ stages:
targetType: 'inline'
script: |
set -eux
if [ "$BUILD_SYSTEM" = "conda" ] ; then eval "$(conda shell.bash hook)" ; fi
if [ "$BUILD_SYSTEM" = "conda" ] ; then
export MAMBA_ROOT_PREFIX=~/micromamba
eval "$(./bin/micromamba shell hook -s posix)"
micromamba activate build
fi
export RUN_PACKAGE_VERSION_TEST=$(project_name)
cd $(Build.SourcesDirectory)/$(project_root)
Expand Down
57 changes: 23 additions & 34 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,42 +4,31 @@ channels:
- bcg_gamma
dependencies:
# run
- gamma-pytools ~= 2.1
- joblib ~= 1.2
- lightgbm ~= 3.3
- matplotlib ~= 3.6
- numpy ~= 1.24
- pandas ~=1.5|~=2.0
- python ~= 3.9
- scikit-learn ~= 1.2.2
- scipy ~= 1.10.0
- xgboost ~= 1.7
- pip ~= 23.0.1
- boruta_py ~= 0.3
- gamma-pytools ~= 2.1
- joblib ~= 1.2
- lightgbm ~= 3.3
- matplotlib ~= 3.7
- numpy ~= 1.24
- pandas ~= 2.0
- pip ~= 23.0
- python ~= 3.9
- scikit-learn ~= 1.2.0
- scipy ~= 1.10
- xgboost ~= 1.7
- pip:
- arfs ~= 1.0.5
- shap ~= 0.41.0
# build/test
- conda-build ~= 3.23.3
- conda-verify ~= 3.1.1
- docutils ~= 0.17.1
- flit ~= 3.8.0
- jinja2 ~= 2.11.3
- markupsafe ~= 2.0.1 # markupsafe 2.1 breaks support for jinja2
- m2r ~= 0.3.1
- pluggy ~= 0.13.1
- pre-commit ~= 2.21.0
- pytest ~= 7.2.2
- arfs ~= 1.1
# test
- pytest ~= 7.2.1
- pytest-cov ~= 2.12.1
- pyyaml ~= 5.4.1
- toml ~= 0.10.2
- tox ~= 3.27.1
- yaml ~= 0.2.5
# sphinx
- nbsphinx ~= 0.8.12
- sphinx ~= 4.5.0
- nbsphinx ~= 0.8.9
- sphinx ~= 4.5.0
- sphinx-autodoc-typehints ~= 1.19.2
- pydata-sphinx-theme ~= 0.8.1
- pydata-sphinx-theme ~= 0.8.1
# notebooks
- jupyterlab ~= 3.5.3
- openpyxl ~= 3.0.10
- seaborn ~= 0.12.2
- ipywidgets ~= 8.0
- jupyterlab ~= 3.5
- openpyxl ~= 3.0
- seaborn ~= 0.12
- tableone ~= 0.7
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ typing_inspect = "~=0.4.0"

[build.matrix.max]
# direct requirements of sklearndf
arfs = "~=1.0.5"
arfs = "~=1.1"
gamma-pytools = "~=2.1"
lightgbm = "~=3.3"
numpy = ">=1.23" # cannot use ~= due to conda bug
Expand Down
2 changes: 1 addition & 1 deletion src/sklearndf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,4 @@
from ._sklearn_version import *
from ._sklearndf import *

__version__ = "2.2.0"
__version__ = "2.2.1"
14 changes: 13 additions & 1 deletion src/sklearndf/_sklearndf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import inspect
import logging
from abc import ABCMeta, abstractmethod
from typing import Any, Dict, List, Mapping, Optional, TypeVar, Union, cast
from typing import Any, Callable, Dict, List, Mapping, Optional, TypeVar, Union, cast

import numpy.typing as npt
import pandas as pd
Expand Down Expand Up @@ -210,6 +210,18 @@ def _get_n_outputs(self) -> int:
# get the number of outputs this estimator has been fitted to
return len(self._get_outputs() or [])

def _repr_html_(self) -> str:
try:
# class HasExpressionRepr defines a _repr_html_ method; we want to
# skip that one and call the _repr_html_ method of the superclass
sklearn_repr_html: Callable[[], str] = super(
HasExpressionRepr, self
)._repr_html_
except AttributeError:
return super()._repr_html_()
else:
return sklearn_repr_html()

def to_expression(self) -> Expression:
"""[see superclass]"""

Expand Down
12 changes: 12 additions & 0 deletions src/sklearndf/transformation/extra/_extra.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

import logging

import numpy as np
from sklearn.base import BaseEstimator, TransformerMixin

from pytools.api import AllTracker
Expand All @@ -19,6 +20,17 @@
# import boruta classes only if installed
from boruta import BorutaPy

# Apply a hack to address boruta's incompatibility with numpy >= 1.24:
# boruta uses np.float_ which is deprecated in numpy >= 1.20 and removed in 1.24.
#
# We check these types are already defined in numpy, and if not, we define them
# as aliases to the corresponding new types with a trailing underscore.

for __attr in ["bool", "int", "float"]:
if not hasattr(np, __attr):
setattr(np, __attr, getattr(np, f"{__attr}_"))
del __attr

except ImportError:

class BorutaPy( # type: ignore
Expand Down
49 changes: 46 additions & 3 deletions test/test/sklearndf/test_base.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
# inspired by:
# https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/tests/test_base.py

import re
from typing import Any

import numpy as np
import pytest
import scipy.sparse as sp
import sklearn
from numpy.testing import assert_array_equal
from sklearn import clone
from sklearn.base import BaseEstimator, is_classifier
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.pipeline import Pipeline
from sklearn.utils import estimator_html_repr

from pytools.expression import freeze, make_expression
from pytools.expression.atomic import Id

from sklearndf.classification import SVCDF, DecisionTreeClassifierDF
from sklearndf.clustering.wrapper import KMeansBaseWrapperDF
from sklearndf.pipeline import PipelineDF
from sklearndf.transformation import OneHotEncoderDF
from sklearndf.regression import RandomForestRegressorDF
from sklearndf.transformation import OneHotEncoderDF, SimpleImputerDF
from sklearndf.transformation.wrapper import ImputerWrapperDF
from sklearndf.wrapper import (
ClassifierWrapperDF,
Expand Down Expand Up @@ -75,7 +79,7 @@ def test_clone() -> None:
assert encoder.get_params() == new_encoder.get_params()

encoder = OneHotEncoderDF(handle_unknown="ignore", sparse=False)
new_encoder = clone(encoder)
new_encoder = sklearn.clone(encoder)

assert encoder is not new_encoder

Expand Down Expand Up @@ -161,6 +165,45 @@ def test_str() -> None:
str(my_estimator)


def test_html_repr() -> None:
# store the original display config
display_original = sklearn.get_config()["display"]

# set the display config to use diagrams
sklearn.set_config(display="diagram")

try:
pipeline_df = PipelineDF(
[
(
"preprocess",
PipelineDF(
[
("impute", SimpleImputerDF()),
]
),
),
("rf", RandomForestRegressorDF(n_estimators=120)),
]
)

def _replace_ids(_html: str) -> str:
# scikit-learn generates new ids on subsequent calls to estimator_html_repr,
# so we replace them with a placeholder
return re.sub(
r'(?<=id-)\d+|(?:(?<=sk-)|(?<=id=")|(?<=for="))\w+(?:-\w+)*', "#", _html
)

assert _replace_ids(pipeline_df._repr_html_()) == _replace_ids(
estimator_html_repr(pipeline_df)
)

finally:
# reset the display config to its original value
sklearn.set_config(display=display_original)
pass


def test_get_params() -> None:
test = DummyEstimator2DF(DummyEstimator3DF(), DummyEstimator3DF())

Expand Down
Loading

0 comments on commit 8335d8c

Please sign in to comment.