Machine Learning & NLTK Analyses: Random Forest Regressor: Effect of parameters

Random Forest Regressor

Random Forest Regressor technique is tested here to see their accuracy in terms of output.

Python program:

>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from matplotlib.colors import ListedColormap
>>> from sklearn import ensemble, datasets
>>> iris = datasets.load_iris()
>>> x = iris.data[:, :2]
>>> y = iris.target
>>> h = .02
>>> cmap_bold = ListedColormap(['firebrick', 'lawngreen', 'b'])
>>> cmap_light = ListedColormap(['pink', 'palegreen', 'lightcyan'])

//Plotting the analysis//

a) Effect of maximum depth(max_depth):

>>> for max_depth in [5, 25, 50, 125, 200, 500, 1250, 4000, 7800, 12500, None]:

... clf = ensemble.RandomForestRegressor(max_depth=max_depth)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (max_depth='%s')" %(max_depth))

...

b) Effect of maximum depth(max_leaf_nodes):

>>> for max_leaf_nodes in [5, 25, 50, 125, 200, 500, 1250, 4000, 7800, 12500, None]:

... clf = ensemble.RandomForestRegressor(max_leaf_nodes=max_leaf_nodes)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (max_leaf_nodes='%s')" %(max_leaf_nodes))

...

c) Effect of maximum depth(min_samples_leaf):

>>> for min_samples_leaf in [1, 2, 3, 4, 5, 6, 7 , 8, 9, 20, 50]:

... clf = ensemble.RandomForestRegressor(min_samples_leaf=min_samples_leaf)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (min_samples_leaf='%s')" %(min_samples_leaf))

...

d) Effect of minimum sample split (min_samples_split):

>>> for min_samples_leaf in [2, 4, 8, 12, 20, 50]:

... clf = ensemble.RandomForestRegressor(min_samples_split=min_samples_split)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (min_samples_split='%s')" %(min_samples_split))

...

e) Effect of number of estimators (n_estimators):

>>> for n_estimators in [2, 5, 25, 50, 125, 200, 500, 1250, 4000, 7800, 12500]:

... clf = ensemble.RandomForestRegressor(n_estimators=n_estimators)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (n_estimators='%s')" %(n_estimators))

...

f) Effect of out of bounds (oob_score):

>>> for n_jobs in [2, 5, 25, 50, 125, 200, 500, 1250, 4000, 7800, 12500]:

... clf = ensemble.RandomForestRegressor(oob_score=oob_score)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (oob_score='%s')" %(oob_score))

...

g) Effect of number of estimators (n_jobs):

>>> for n_jobs in [2, 5, 25, 50, 125, 200, 500, 1250, 4000, 7800, 12500]:

... clf = ensemble.RandomForestRegressor(n_jobs=n_jobs)

... clf.fit(x, y)

... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1

... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1

... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])

... z = z.reshape(xx.shape)

... plt.figure()

... plt.pcolormesh(xx, yy, z, cmap=cmap_light)

... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)

... plt.xlim(xx.min(), xx.max())

... plt.ylim(yy.min(), yy.max())

... plt.title("RandomForestRegressor (n_jobs='%s')" %(n_jobs))

...

Machine Learning & NLTK Analyses

Tuesday, 19 September 2017

Random Forest Regressor: Effect of parameters

No comments:

Post a Comment