Decision Tree Classifier-Effect of parameters on output
Decision Tree Classifier technique is tested here to see their accuracy in terms of output.
Python program:
//Plotting the analysis//
a) Maximum leaf nodes (max_leaf_nodes):
>>> for max_leaf_nodes in [2, 10, 50, 100]:
... clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (max_leaf_nodes='%s')" %(max_leaf_nodes))
...
b) Maximum depth (max_depth):
>>> for max_depth in [2, 10, 50, 100, 500]:
... clf = tree.DecisionTreeClassifier(max_depth=max_depth)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (max_depth='%s')" %(max_depth))
...
c) Maximum features (max_features):
>>> for max_features in [1, 2, None]:
... clf = tree.DecisionTreeClassifier(max_features=max_features)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (max_features='%s')" %(max_features))
...
d) Minimum samples split(min_samples_split):
>>> for min_samples_split in [2, 10, 100, 500]:
... clf = tree.DecisionTreeClassifier(min_samples_split=min_samples_split)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (min_samples_split='%s')" %(min_samples_split))
...
e)Minimum weight fraction per leaf(min_weight_fraction_leaf):
>>> for min_weight_fraction_leaf in [0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.4, 0.5]:
... clf = tree.DecisionTreeClassifier(min_weight_fraction_leaf=min_weight_fraction_leaf)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (min_weight_fraction_leaf='%s')" %(min_weight_fraction_leaf))
...
Decision Tree Classifier technique is tested here to see their accuracy in terms of output.
Python program:
>>> import numpy as np
>>> import matplotlib.pyplot as plt
>>> from matplotlib.colors import ListedColormap
>>> from sklearn import neighbors, datasets
>>> n_neighbors = 24
>>> iris = datasets.load_iris()
>>> x = iris.data[:, :2]
>>> y = iris.target
>>> h = .02
>>> cmap_bold = ListedColormap(['firebrick', 'lime', 'blue'])
>>> cmap_light = ListedColormap(['pink', 'lightgreen', 'paleturquoise'])
//Plotting the analysis//
a) Maximum leaf nodes (max_leaf_nodes):
>>> for max_leaf_nodes in [2, 10, 50, 100]:
... clf = tree.DecisionTreeClassifier(max_leaf_nodes=max_leaf_nodes)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (max_leaf_nodes='%s')" %(max_leaf_nodes))
...
b) Maximum depth (max_depth):
>>> for max_depth in [2, 10, 50, 100, 500]:
... clf = tree.DecisionTreeClassifier(max_depth=max_depth)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (max_depth='%s')" %(max_depth))
...
c) Maximum features (max_features):
>>> for max_features in [1, 2, None]:
... clf = tree.DecisionTreeClassifier(max_features=max_features)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (max_features='%s')" %(max_features))
...
d) Minimum samples split(min_samples_split):
>>> for min_samples_split in [2, 10, 100, 500]:
... clf = tree.DecisionTreeClassifier(min_samples_split=min_samples_split)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (min_samples_split='%s')" %(min_samples_split))
...
e)Minimum weight fraction per leaf(min_weight_fraction_leaf):
>>> for min_weight_fraction_leaf in [0.001, 0.005, 0.01, 0.05, 0.1, 0.2, 0.4, 0.5]:
... clf = tree.DecisionTreeClassifier(min_weight_fraction_leaf=min_weight_fraction_leaf)
... clf.fit(x, y)
... x_min, x_max = x[:, 0].min() -1, x[:, 0].max() +1
... y_min, y_max = x[:, 1].min() -1, x[:, 1].max() +1
... xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
... z = clf.predict(np.c_[xx.ravel(), yy.ravel()])
... z = z.reshape(xx.shape)
... plt.figure()
... plt.pcolormesh(xx, yy, z, cmap=cmap_light)
... plt.scatter(x[:, 0], x[:, 1], c=y, cmap=cmap_bold, edgecolor='k', s=24)
... plt.xlim(xx.min(), xx.max())
... plt.ylim(yy.min(), yy.max())
... plt.title("DecisionTreeClassifier (min_weight_fraction_leaf='%s')" %(min_weight_fraction_leaf))
...
No comments:
Post a Comment