Tuesday, 19 September 2017

Bagging Regression: Effect of parameters

Bagging Regression

 Bagging Regression technique is tested here to see their accuracy in terms of output.

Python program:


>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from matplotlib.colors import ListedColormap
>>> from sklearn import ensemble, datasets
>>> iris = datasets.load_iris()
>>> x = iris.data[:, :2]
>>> y = iris.target
>>> h = .02
>>> cmap_bold = ListedColormap(['firebrick', 'lawngreen', 'b'])
>>> cmap_light = ListedColormap(['pink', 'palegreen', 'lightcyan'])

//Plotting the analysis//


a) Effect of number of estimators(n_estimators):
>>> for n_estimators in [1, 2, 5, 25, 50, 100, 150, 250, 500, 1250]:
...     clf = ensemble.BaggingRegressor(n_estimators=n_estimators)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("Bagging Regressor (n_estimators='%s')" %(n_estimator))

Output:


Increased number of estimators result in improved output accuracy.

b) Effect of number of jobs (n_jobs):

>>> for n_jobs in [1, 2, 5, 25, 50, 100, 150, 250, 500, 1250]:
...     clf = ensemble.BaggingRegressor(n_jobs=n_jobs)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("Bagging Regressor (n_jobs='%s')" %(n_jobs))

Increased number of jobs(n_jobs) result in improved analyses accuracy.

c) Effect of maximum sample size:

>>> for max_samples in [1, 2, 5, 25, 50, 100, 120, 150]:
...     clf = ensemble.BaggingRegressor(max_samples=max_samples)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("Bagging Regressor (n_estimators='%s')" %(n_estimators))
...     plt.title("Bagging Regressor (max_samples='%s')" %(max_samples))
...

Increased number of max_samples result in increased output accuracy.

d) Effect of Bootstrap on output:

>>> for bootstrap_features in [True, False]:
...     clf = ensemble.BaggingRegressor(bootstrap_features=bootstrap_features)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("Bagging Regressor (bootstrap_features='%s')" %(bootstrap_features))
...
Change in status of bootstrap_features from False to True result in improved accuracy.

e) Effect of out of bound:
>>> for oob_score in [True, False]:
...     clf = ensemble.BaggingRegressor(oob_score=oob_score)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("Bagging Regressor (oob_score='%s')" %(oob_score))
...
Change in status of oob from False to True result in improved accuracy.

f) Effect of number of features:
>>> for max_features in [1, 2]:
...     clf = ensemble.BaggingRegressor(max_features=max_features)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("Bagging Regressor (max_features='%s')" %(max_features))
...

Increase in features result in improved accuracy.

No comments:

Post a Comment