This works (mostly from the demo sample at sklearn):
print(__doc__)
# Code source: Ga?l Varoquaux
# Modified for documentation by Jaques Grobler
# License: BSD 3 clause
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model, decomposition, datasets
from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from scipy.stats import uniform
lregress = LinearRegression()
pca = decomposition.PCA()
pipe = Pipeline(steps=[('pca', pca), ('regress', lregress)])
# Plot the PCA spectrum
pca.fit(data_num)
plt.figure(1, figsize=(16, 9))
plt.clf()
plt.axes([.2, .2, .7, .7])
plt.plot(pca.explained_variance_, linewidth=2)
plt.axis('tight')
plt.xlabel('n_components')
plt.ylabel('explained_variance_')
# Prediction
n_components = uniform.rvs(loc=1, scale=data_num.shape[1], size=50,
random_state=42).astype(int)
# Parameters of pipelines can be set using ‘__’ separated parameter names:
estimator_pca = GridSearchCV(pipe,
dict(pca__n_components=n_components)
)
estimator_pca.fit(data_num, data_labels)
plt.axvline(estimator_pca.best_estimator_.named_steps['pca'].n_components,
linestyle=':', label='n_components chosen ' +
str(estimator_pca.best_estimator_.named_steps['pca'].n_components))
plt.legend(prop=dict(size=12))
plt.plot(np.cumsum(pca.explained_variance_ratio_), linewidth=1)
plt.show()
And this works:
from sklearn.feature_selection import RFECV
estimator = LinearRegression()
selector = RFECV(estimator, step=1, cv=5, scoring='explained_variance')
selector = selector.fit(data_num_pd, data_labels)
print("Selected number of features : %d" % selector.n_features_)
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score")
plt.plot(range(1, len(selector.grid_scores_) + 1), selector.grid_scores_)
plt.show()
but this gives me the error "RuntimeError: The classifier does not expose "coef_" or "feature_importances_" attributes" on the line "selector1 = selector1.fit"
pca_est = estimator_pca.best_estimator_
selector1 = RFECV(pca_est, step=1, cv=5, scoring='explained_variance')
selector1 = selector1.fit(data_num_pd, data_labels)
print("Selected number of features : %d" % selector1.n_features_)
plt.figure()
plt.xlabel("Number of features selected")
plt.ylabel("Cross validation score")
plt.plot(range(1, len(selector1.grid_scores_) + 1), selector1.grid_scores_)
plt.show()
How do I get my best-found PCA estimator to be used as the estimator in RFECV?
See Question&Answers more detail:
os 与恶龙缠斗过久,自身亦成为恶龙;凝视深渊过久,深渊将回以凝视…