Tuesday, 19 September 2017

Gradient Boosting Classifier: Effect of parameters

Gradient Boosting Classifier-Effect of parameters on output 

Gradient Boosting Classifier technique is tested here to see their accuracy in terms of output.


Python program:


>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from matplotlib.colors import ListedColormap
>>> from sklearn import neighbors, datasets
>>> n_neighbors = 24
>>> iris = datasets.load_iris()
>>> x = iris.data[:, :2]
>>> y = iris.target
>>> h = .02
>>> cmap_bold = ListedColormap(['firebrick', 'lime', 'blue'])
>>> cmap_light = ListedColormap(['pink', 'lightgreen', 'paleturquoise'])

//Plotting the analysis//

a) Maximum depth (max_depth):

>>> for max_depth in [1, 2, 5, 25, 125, 500, 1250]:
...     clf = ensemble.GradientBoostingClassifier(max_depth=max_depth)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("GradientBoostingClassifier (max_depth='%s')" %(max_depth))
...

Output:




                              Output improves with increase in depth.

b) Maximum leaf nodes (max_leaf_nodes):

>>> for max_leaf_nodes in [2, 5, 25, 125, 500, 1250]:
...     clf = ensemble.GradientBoostingClassifier(max_leaf_nodes=max_leaf_nodes)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("GradientBoostingClassifier (max_leaf_nodes='%s')" %(max_leaf_nodes))
...
Output:



Output accuracy improves with increase in maximum leaf nodes (max_leaf_nodes).

c) Learning rate (learning_rate):
>>> for learning_rate in [2, 5, 25, 125, 500, 1250]:
...     clf = ensemble.GradientBoostingClassifier(learning_rate=learning_rate)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("GradientBoostingClassifier (learning_rate='%s')" %(learning_rate))
...
Output:





Lower learning gives better output accuracy.

d) Effect of learning rate (learning_rate)
>>> for learning_rate in [0.01, 0.05, 0.1, 0.2, 0.5, 0.75, 1, 1.5]:
...     clf = ensemble.GradientBoostingClassifier(learning_rate=learning_rate)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("GradientBoostingClassifier (learning_rate='%s')" %(learning_rate))
...
Output:







e) Minimum samples in leaf (min_samples_leaf):

>>> for min_samples_leaf in [1, 2, 5, 25, 125, 500, 1000]:
...     clf = ensemble.GradientBoostingClassifier(min_samples_leaf=min_samples_leaf)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("GradientBoostingClassifier (min_samples_leaf='%s')" %(min_samples_leaf))
...
Output:



                  Increased maximum leaf size lowers output accuracy.

f) Warm start (warm-start):

>>> for warm_start in ['True', 'False', None]:
...     clf = ensemble.GradientBoostingClassifier(warm_start=warm_start)
...     clf.fit(x, y)
...     x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
...     y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
...     xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
...     z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
...     z = z.reshape(xx.shape)
...     plt.figure()
...     plt.pcolormesh(xx, yy, z, cmap=cmap_light)
...     plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
...     plt.xlim(xx.min(), xx.max())
...     plt.ylim(yy.min(), yy.max())
...     plt.title("GradientBoostingClassifier (warm_start='%s')" %(warm_start))
...
Output:
No effect on output.






No comments:

Post a Comment