Fantastic features and where to find them (code)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
from sklearn import datasets
sns.set_style("whitegrid")
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:90% !important; }</style>"))
data = [
(datasets.load_boston, "reg"),
(datasets.load_breast_cancer, "cls"),
(datasets.load_diabetes, "reg"),
(datasets.load_digits, "cls"),
(datasets.load_iris, "cls"),
(datasets.load_linnerud, "reg"),
(datasets.load_wine, "cls"),
]
importances = []
for f, target in data:
X, y = f(return_X_y=True)
model = RandomForestRegressor() if target == "reg" else RandomForestClassifier()
model.fit(X, y)
feat_nb = model.feature_importances_
feat_nb = np.sort(feat_nb, axis=-1)[::-1]
importances.append(feat_nb)
importances_df = pd.DataFrame(importances, index=["boston", "cancer", "diabetes", "digits", "iris", "linnerud", "wine"])
importances_df.T.plot(figsize=(30, 14))
importances_df.cumsum(1).T.plot(figsize=(30, 14))