Gradient Boosting Regressor-Effect of parameters on output
Gradient Boosting Regressor technique is tested here to see their accuracy in terms of output.
Python program:
//Plotting the analysis//
a) Effect of learning rate (learning_rate):
Learning_rate >=1.0
>>> for learning_rate in [1.0, 2.0, 15.0, 75.0, 250.0, 1000.0]:
... clf = ensemble.GradientBoostingRegressor(learning_rate=learning_rate)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (learning_rate='%s')" %(learning_rate))
...
It is found that higher value of learning rate (>50) and beyond 250 result in substantially lowered accuracy.
In fact, learning_rate = 1 or < 1.0 give more accurate analyses results.
Learning rate< 1.0
>>> for learning_rate in [0.10, 0.20, 0.50, 0.75, 0.9]:
... clf = ensemble.GradientBoostingRegressor(learning_rate=learning_rate)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (learning_rate='%s')" %(learning_rate))
...
b) Effect of alpha:
>>> for alpha in [0.10, 0.20, 0.50, 0.75, 0.9]:
... clf = ensemble.GradientBoostingRegressor(alpha=alpha)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (alpha='%s')" %(alpha))
...
Alpha must be in range of 0 to 1. And there is not significant effect seen on output value by change in alpha value.
c) Effect of maximum depth (max_depth):
>>> for max_depth in [1.0, 2.0, 5.0, 50, 175, 500, 1000]:
... clf = ensemble.GradientBoostingRegressor(max_depth=max_depth)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (max_depth='%s')" %(max_depth))
...
d) Effect of Maximum leaf nodes(max_leaf_nodes):
>>> for max_leaf_nodes in [2, 5, 50, 175, 500, 1000]:
... clf = ensemble.GradientBoostingRegressor(max_leaf_nodes=max_leaf_nodes)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (max_leaf_nodes='%s')" %(max_leaf_nodes))
...
e) Effect of minimum impurity decrease (min_impurity_decrease):
When (min_impurity_decrease)< =1
>>> for min_impurity_decrease in [0.01, 0.1, 0.5, 0.75]:
... clf = ensemble.GradientBoostingRegressor(min_impurity_decrease=min_impurity_decrease)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (min_impurity_decrease='%s')" %(min_impurity_decrease))
...
>>> for min_impurity_decrease in [1, 2, 5, 50, 175, 500, 1000]:
... clf = ensemble.GradientBoostingRegressor(min_impurity_decrease=min_impurity_decrease)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (min_impurity_decrease='%s')" %(min_impurity_decrease))
...
f) Number of estimators (n_estimators):
>>> for n_estimators in [1, 2, 5, 25, 125, 500, 1250]:
... clf = ensemble.GradientBoostingRegressor(n_estimators=n_estimators)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (n_estimators='%s')" %(n_estimators))
...
g) Effect of Minimum impurity split(min_impurity_split):
>>> for min_impurity_split in [1, 2, 5, 25, 125, 500, 1250]:
... clf = ensemble.GradientBoostingRegressor(min_impurity_split=min_impurity_split)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (min_impurity_split='%s')" %(min_impurity_split))
...
Gradient Boosting Regressor technique is tested here to see their accuracy in terms of output.
Python program:
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from matplotlib.colors import ListedColormap
>>> from sklearn import neighbors, datasets
>>> n_neighbors = 24
>>> iris = datasets.load_iris()
>>> x = iris.data[:, :2]
>>> y = iris.target
>>> h = .02
>>> cmap_bold = ListedColormap(['firebrick', 'lime', 'blue'])
>>> cmap_light = ListedColormap(['pink', 'lightgreen', 'paleturquoise'])
//Plotting the analysis//
a) Effect of learning rate (learning_rate):
Learning_rate >=1.0
>>> for learning_rate in [1.0, 2.0, 15.0, 75.0, 250.0, 1000.0]:
... clf = ensemble.GradientBoostingRegressor(learning_rate=learning_rate)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (learning_rate='%s')" %(learning_rate))
...
It is found that higher value of learning rate (>50) and beyond 250 result in substantially lowered accuracy.
In fact, learning_rate = 1 or < 1.0 give more accurate analyses results.
Learning rate< 1.0
>>> for learning_rate in [0.10, 0.20, 0.50, 0.75, 0.9]:
... clf = ensemble.GradientBoostingRegressor(learning_rate=learning_rate)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (learning_rate='%s')" %(learning_rate))
...
b) Effect of alpha:
>>> for alpha in [0.10, 0.20, 0.50, 0.75, 0.9]:
... clf = ensemble.GradientBoostingRegressor(alpha=alpha)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (alpha='%s')" %(alpha))
...
Alpha must be in range of 0 to 1. And there is not significant effect seen on output value by change in alpha value.
c) Effect of maximum depth (max_depth):
>>> for max_depth in [1.0, 2.0, 5.0, 50, 175, 500, 1000]:
... clf = ensemble.GradientBoostingRegressor(max_depth=max_depth)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (max_depth='%s')" %(max_depth))
...
d) Effect of Maximum leaf nodes(max_leaf_nodes):
>>> for max_leaf_nodes in [2, 5, 50, 175, 500, 1000]:
... clf = ensemble.GradientBoostingRegressor(max_leaf_nodes=max_leaf_nodes)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (max_leaf_nodes='%s')" %(max_leaf_nodes))
...
e) Effect of minimum impurity decrease (min_impurity_decrease):
When (min_impurity_decrease)< =1
>>> for min_impurity_decrease in [0.01, 0.1, 0.5, 0.75]:
... clf = ensemble.GradientBoostingRegressor(min_impurity_decrease=min_impurity_decrease)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (min_impurity_decrease='%s')" %(min_impurity_decrease))
...
>>> for min_impurity_decrease in [1, 2, 5, 50, 175, 500, 1000]:
... clf = ensemble.GradientBoostingRegressor(min_impurity_decrease=min_impurity_decrease)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (min_impurity_decrease='%s')" %(min_impurity_decrease))
...
f) Number of estimators (n_estimators):
>>> for n_estimators in [1, 2, 5, 25, 125, 500, 1250]:
... clf = ensemble.GradientBoostingRegressor(n_estimators=n_estimators)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (n_estimators='%s')" %(n_estimators))
...
g) Effect of Minimum impurity split(min_impurity_split):
>>> for min_impurity_split in [1, 2, 5, 25, 125, 500, 1250]:
... clf = ensemble.GradientBoostingRegressor(min_impurity_split=min_impurity_split)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("GradientBoostingRegressor (min_impurity_split='%s')" %(min_impurity_split))
...
No comments:
Post a Comment