1 import numpy as np
2 import matplotlib.pyplot as plt
3 from .plot_helpers import cm2, cm3, discrete_scatter
4
5 def _call_classifier_chunked(classifier_pred_or_decide, X):
6 # The chunk_size is used to chunk the large arrays to work with x86
7 # memory models that are restricted to < 2 GB in memory allocation. The
8 # chunk_size value used here is based on a measurement with the
9 # MLPClassifier using the following parameters:
10 # MLPClassifier(solver='lbfgs', random_state=0,
11 # hidden_layer_sizes=[1000,1000,1000])
12 # by reducing the value it is possible to trade in time for memory.
13 # It is possible to chunk the array as the calculations are independent of
14 # each other.
15 # Note: an intermittent version made a distinction between
16 # 32- and 64 bit architectures avoiding the chunking. Testing revealed
17 # that even on 64 bit architectures the chunking increases the
18 # performance by a factor of 3-5, largely due to the avoidance of memory
19 # swapping.
20 chunk_size = 10000
21
22 # We use a list to collect all result chunks
23 Y_result_chunks = []
24
25 # Call the classifier in chunks.
26 for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0],
27 chunk_size, dtype=np.int32),
28 axis=0):
29 Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
30
31 return np.concatenate(Y_result_chunks)
32
33
34 def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
35 alpha=1, cm=cm3):
36 # multiclass
37 if eps is None:
38 eps = X.std() / 2.
39
40 if ax is None:
41 ax = plt.gca()
42
43 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
44 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
45 xx = np.linspace(x_min, x_max, 1000)
46 yy = np.linspace(y_min, y_max, 1000)
47
48 X1, X2 = np.meshgrid(xx, yy)
49 X_grid = np.c_[X1.ravel(), X2.ravel()]
50 decision_values = classifier.predict(X_grid)
51 ax.imshow(decision_values.reshape(X1.shape), extent=(x_min, x_max,
52 y_min, y_max),
53 aspect='auto', origin='lower', alpha=alpha, cmap=cm)
54 ax.set_xlim(x_min, x_max)
55 ax.set_ylim(y_min, y_max)
56 ax.set_xticks(())
57 ax.set_yticks(())
58
59
60 def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis",
61 function=None):
62 # binary with fill
63 if eps is None:
64 eps = X.std() / 2.
65
66 if ax is None:
67 ax = plt.gca()
68
69 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
70 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
71 xx = np.linspace(x_min, x_max, 100)
72 yy = np.linspace(y_min, y_max, 100)
73
74 X1, X2 = np.meshgrid(xx, yy)
75 X_grid = np.c_[X1.ravel(), X2.ravel()]
76 if function is None:
77 function = getattr(classifier, "decision_function",
78 getattr(classifier, "predict_proba"))
79 else:
80 function = getattr(classifier, function)
81 decision_values = function(X_grid)
82 if decision_values.ndim > 1 and decision_values.shape[1] > 1:
83 # predict_proba
84 decision_values = decision_values[:, 1]
85 grr = ax.imshow(decision_values.reshape(X1.shape),
86 extent=(x_min, x_max, y_min, y_max), aspect='auto',
87 origin='lower', alpha=alpha, cmap=cm)
88
89 ax.set_xlim(x_min, x_max)
90 ax.set_ylim(y_min, y_max)
91 ax.set_xticks(())
92 ax.set_yticks(())
93 return grr
94
95
96 def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
97 cm=cm2, linewidth=None, threshold=None,
98 linestyle="solid"):
99 # binary?
100 if eps is None:
101 eps = X.std() / 2.
102
103 if ax is None:
104 ax = plt.gca()
105
106 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
107 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
108 xx = np.linspace(x_min, x_max, 1000)
109 yy = np.linspace(y_min, y_max, 1000)
110
111 X1, X2 = np.meshgrid(xx, yy)
112 X_grid = np.c_[X1.ravel(), X2.ravel()]
113 if hasattr(classifier, "decision_function"):
114 decision_values = _call_classifier_chunked(classifier.decision_function,
115 X_grid)
116 levels = [0] if threshold is None else [threshold]
117 fill_levels = [decision_values.min()] + levels + [
118 decision_values.max()]
119 else:
120 # no decision_function
121 decision_values = _call_classifier_chunked(classifier.predict_proba,
122 X_grid)[:, 1]
123 levels = [.5] if threshold is None else [threshold]
124 fill_levels = [0] + levels + [1]
125 if fill:
126 ax.contourf(X1, X2, decision_values.reshape(X1.shape),
127 levels=fill_levels, alpha=alpha, cmap=cm)
128 else:
129 ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
130 colors="black", alpha=alpha, linewidths=linewidth,
131 linestyles=linestyle, zorder=5)
132
133 ax.set_xlim(x_min, x_max)
134 ax.set_ylim(y_min, y_max)
135 ax.set_xticks(())
136 ax.set_yticks(())
137
138
139 if __name__ == '__main__':
140 from sklearn.datasets import make_blobs
141 from sklearn.linear_model import LogisticRegression
142 X, y = make_blobs(centers=2, random_state=42)
143 clf = LogisticRegression(solver='lbfgs').fit(X, y)
144 plot_2d_separator(clf, X, fill=True)
145 discrete_scatter(X[:, 0], X[:, 1], y)
146 plt.show()
-
2018-11-21 17:03:43
#绘制决策边界,多个近邻比较(多图合并) fig, axes = plt.subplots(1, 3, figsize=(10, 3)) #绘制多个画板 for neighbors, ax in zip([1, 3, 9], axes): knn = KNeighborsClassifier(n_neighbors=neighbors) knn.fit(X, y) mglearn.plots.plot_2d_separator(knn, X, fill=True, eps=0.5, ax=ax, alpha=.4) #绘制分界线 mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax) ax.set_title("{} neighbor(s)".format(neighbors)) ax.set_xlabel("feature 0") ax.set_ylabel("feature 1") axes[0].legend(loc=3)
plot_2d_separator
对于二维数据集,还可以在xy平面上画出所有可能的测试点的预测结果。我们根据平面中每个点所属的类别对平面进行着色。这样可以查看决策边界(decision boundary),即算法对类别0和类别1的分界线
更多相关内容 -
画出决策边界线--plot_2d_separator.py源代码【来自python机器学习基础教程】
2021-05-29 06:55:261 import numpy as np 2 import matplotlib.pyplot as plt 3 from .plot_helpers import cm2, cm3, discrete_scatter 4 5 def _call_classifier_chunked(classifier_pred_or_decide, X): 6 # The chu... -
Plplot绘制简单2D函数图
2017-01-14 16:54:18Plplot绘制简单2D函数图 -
Python监督学习_神经网络(深度学习)
2020-01-31 23:18:29神经网络(深度学习) 深度学习算法往往经过精确调整,只适用于特定的应用场景。这里讨论简单的用于分类和回归的多层感知机(multilayer ...display(mglearn.plots.plot_logistic_regression_graph()) 输入特征...神经网络(深度学习)
深度学习算法往往经过精确调整,只适用于特定的应用场景。这里讨论简单的用于分类和回归的多层感知机(multilayer perceptron, MLP),MLP也被称为前馈神经网络/神经网络。
1.神经网络模型
MLP可以被视为广义的线性模型。
display(mglearn.plots.plot_logistic_regression_graph())
输入特征×系数 加权求和即输出,计算中间的隐单元(hidden unit),再加权求和。
display(mglearn.plots.plot_single_hidden_layer_graph())
这个模型需要更多的系数(权重)。为了让这玩意儿比线性模型更强大,在计算完每个隐单元的加权求和后,对结果再应用一个非线性函数----通常是校正非线性(rectifying nonlinearity,也叫校正线性单元或 relu)或正切双曲线(tangens hyperbolicus, tanh)。然后将这个函数的结果用于加权求和,计算输出结果。relu 截断小于0的值,而tanh 在输入值较小时接近-1,较大时接近1,用这两种模型给深度学习加buff。
line = np.linspace(-3, 3, 100) plt.plot(line, np.tanh(line), label="tanh") plt.plot(line, np.maximum(line, 0), label="relu") plt.legend(loc="best") plt.xlabel("x") plt.ylabel("relu(x), tanh(x)") plt.show()
权重都是要从数据中学习得到,需要我们设置的只有结点数,有的时候它可以小到10,有时可以大到10000.
mglearn.plots.plot_two_hidden_layer_graph()
2.神经网络调参
我们使用MLPClassifier
from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) mlp = MLPClassifier(solver='lbfgs', random_state=0).fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show()
这个决策边界时非线性的,但是相对平滑,且用到了solver='lbfgs'。默认情况,使用100个隐结点,我们可以减少结点数量但仍然有个很好的结果。
from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show()
单元更少时,决策边缘参差不齐,默认的非线性是 relu。如果使用单隐层,那么决策函数将由10个直线段组成。
from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10, 10]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show()
这次使用tanh非线性
from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) mlp = MLPClassifier(solver='lbfgs', activation='tanh', random_state=0, hidden_layer_sizes=[10, 10]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show()
最后我们还可以使用L2惩罚使权重趋向于0,正如岭回归和线性分类器中所做的那样。MLPCassifier 中调节L2惩罚的参数是alpha (与线性回归模型相同),默认值很小(弱正则化)。
from sklearn.neural_network import MLPClassifier from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.25, random_state=3) X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42) fig, axes = plt.subplots(2, 4, figsize=(20, 8)) for axx, n_hidden_nodes in zip(axes, [10, 100]): for ax, alpha in zip(axx, [0.0001, 0.01, 0.1, 1]): mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[n_hidden_nodes, n_hidden_nodes], alpha=alpha) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax) ax.set_title("n_hidden=[{}, {}]\nalpha={:.4f}".format( n_hidden_nodes, n_hidden_nodes, alpha)) plt.show()
控制神经网络的复杂度的方法有:隐层的个数、每个隐层中的单元个数与正则化。神经网络最开始权重是随机设置的,因此随机种子会影响它呢。
fig, axes = plt.subplots(2, 4, figsize=(20, 8)) for i, ax in enumerate(axes.ravel()): mlp = MLPClassifier(solver='lbfgs', random_state=i, hidden_layer_sizes=[100, 100]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax) mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax) plt.show()
换个数据继续
from sklearn.datasets import load_breast_cancer cancer = load_breast_cancer() print("Cancer cata per-feature maxima\n{}".format(cancer.data.max(axis=0)))
X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0) mlp = MLPClassifier(random_state=42) mlp.fit(X_train, y_train) print("Accuracy on training set: {:.2f}".format(mlp.score(X_train, y_train))) print("Accuracy on test set: {:.2f}".format(mlp.score(X_test, y_test)))
MLP的精度相当好,但没有其他模型号,较之前的SVC相同,神经网络也要求所有输入特征的变化范围类似,最理想是均值为0,方差为1。这里人工完成,后期将用 StandardScaler 完成。
# 计算训练集中每个特征的平均值 mean_on_train = X_train.mean(axis=0) # 计算训练集中每个特征的标准差 std_on_train = X_train.std(axis=0) # 减去平均值,然后乘以标准差的倒数 # 运算后,mean=0, std=1 X_train_scaled = (X_train - mean_on_train) / std_on_train X_test_scaled = (X_test - mean_on_train) / std_on_train # 对测试集作相同的变换 mlp = MLPClassifier(random_state=0) mlp.fit(X_train_scaled, y_train) print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train))) print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))
模型给出了超过迭代次数的警告,这是用于学习模型的 adam 算法的一部分,接下来增加迭代次数。
mlp = MLPClassifier(max_iter=1000, random_state=0) mlp.fit(X_train_scaled, y_train) print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train))) print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))
增加迭代次数提高了训练集的精度,但是我们需要泛化能力的提高,因此我们尝试降低模型复杂度,这里我们选择增加 alpha 参数(变化范围从0.0001到1),向权重添加更强的正则化。(注明,这个程序运行的是偶训练集的精度和书上不一致达到了1.000
mlp = MLPClassifier(max_iter=1000, alpha=1, random_state=0) mlp.fit(X_train_scaled, y_train) print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train))) print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))
想要观察模型学到了什么,一种方法是查看模型的权重。行对用30个输入特征,列对应100个隐单元。
plt.figure(figsize=(20, 5)) plt.imshow(mlp.coefs_[0], interpolation='none', cmap='viridis') plt.yticks(range(30), cancer.feature_names) plt.xlabel("Columns in weight matrix") plt.ylabel("Input feature") plt.colorbar()
我们可以推断,有些特征的重要性不是很大,有可能是隐单元的权重较小,也有可能是我们没有用神经网络可以使用的方式来表示它们。也可以将连接隐层和输出层可视化,但难以解释就算廖。虽然 MLPClassifier 和 MLPRegressor 为最常见的神经网络架构提供了易于使用的接口,但是他们只包含神经网络潜在的一部分。
最为完善的是基于 theano 库的 keras 、lasagna 和 tensor-flow ,这些库提供了更为灵活的接口,那些流行的深度学习库支持GPU。
3.优点、缺点和参数
主要优点是能够获取大量数据中包含的信息,并构建无比复杂的模型。缺点就是需要大量时间,最好数据均匀,对于不均匀的数据树的表现可能更好。调节神经模型和训练模型的方法有很多种。
估计神经网络的复杂度。最重要的参数层数和每层的隐单元的个数。在考虑神经模型复杂度的时,一个有用的度量是学到的权重的个数。神经网络通常的调参方法是首先创建一个大到足以过拟合的网络,确保网络可以对任务进行学习。之后,要么缩小网络,要么增大alpha来增强正则化,这可以提高泛化性能。
在我们的实验中,主要关注模型的定义:层数、每层的结点个数、正则化和非线性。对于如何学习模型和用来学习参数的算法,由参数 solver 决定,它有两个好用的选项,默认是 ‘adam’ ,大多数情况都很好,但对数据缩放很敏感。另一个是 ‘lbfgs’ ,其鲁棒性相当好,但在大型模型和大型数据集上的时间会比较长。还有更高级的选项 ‘sgd’ 。
fit() 会重置模型
-
对mglearn库的理解
2019-01-15 12:04:14mglearn.plots.plot_linear_regression_wave() 2:这个库中有许多配置好的配色方案,在用pyplot作图时可以方便的调用 这是定位到的源代码: # create a smooth transition from the first to to the...先看该库的配置文件
from . import plots from . import tools from .plots import cm3, cm2 from .tools import discrete_scatter from .plot_helpers import ReBl __version__ = "0.1.7" __all__ = ['tools', 'plots', 'cm3', 'cm2', 'discrete_scatter', 'ReBl']
下面是简单的理解,暂时写了4条:
1:这个库中的plots模块内有许多已经做好的数据视图。方便查看,帮助你省略了构建的过程。
列:一行简单的代码,就可以查看线性回归在回归问题中的使用情况。mglearn.plots.plot_linear_regression_wave()
例:一行代码就可以查看,参数C的大小对LinearSVC算法的影响mglearn.plots.plot_linear_svc_regularization()
2:这个库中的cm2、cm3内有配置好的配色方案,在用pyplot作图时可以方便的调用
这是定位到的源代码:# create a smooth transition from the first to to the second color of cm3 # similar to RdBu but with our red and blue, also not going through white, # which is really bad for greyscale cdict = {'red': [(0.0, 0.0, cm2(0)[0]), (1.0, cm2(1)[0], 1.0)], 'green': [(0.0, 0.0, cm2(0)[1]), (1.0, cm2(1)[1], 1.0)], 'blue': [(0.0, 0.0, cm2(0)[2]), (1.0, cm2(1)[2], 1.0)]} ReBl = LinearSegmentedColormap("ReBl", cdict)
例:
ax.plot(X_train, y_train, '^', c=mglearn.cm2(0), markersize=8) ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)
3.这个库引用的sklearn.datasets模块内,有加载和获取常用数据集、人工生成数据集的方法。
例:加载波士顿房价数据集(带有load关键字的方法,均为加载库中已有数据。)
X, y = mglearn.datasets.load_extended_boston()
例:人工生成数据(带有make关键字的方法)。
from sklearn.datasets import make_moons X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
4:这个库中的plots模块对matplotlib.pyplot做出了修改,更适合绘制类和集群的视图。
例:mglearn.plots.plot_2d_separator(clf, X, fill=False, eps=0.5, ax=ax, alpha=.7)
-
神经网络(深度学习)(笔记)
2021-12-28 13:59:51mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) mglearn.discrete_scatter(X_... -
核支持向量机(SVM)(笔记)
2021-12-28 10:15:35(下面用 LinearSVC举例) from sklearn.svm import LinearSVC linear_svm = LinearSVC().fit(X, y) mglearn.plots.plot_2d_separator(linear_svm, X) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.xlabel(... -
Python监督学习_和支持向量机SVM
2020-01-28 12:57:44mglearn.plots.plot_2d_separator(linear_svm, X) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show() 我们对输入特征进行扩展,将每个数据点... -
机器学习学习笔记之——分类器的不确定度估计
2020-12-08 11:09:23我们将训练点画成圆,将测试数据画成三角: fig, axes = plt.subplots(1, 2, figsize=(13, 5)) mglearn.tools.plot_2d_separator(gbrt, X, ax=axes[0], alpha=.4, fill=True, cm=mglearn.cm2) scores_image = ... -
Python监督学习_决策树
2020-01-19 23:07:18mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4) axes[-1, -1].set_title("Random forest") mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.show... -
Pyhton监督学习_分类器的不确定度估计
2020-01-31 23:50:04mglearn.tools.plot_2d_separator(gbrt, X, ax=axes[0], alpha=.4, fill=True, cm=mglearn.cm2) scores_image = mglearn.tools.plot_2d_scores(gbrt, X, ax=axes[1], alpha=.4, cm=mglearn.ReBl) for ax in axes: ... -
机器学习学习笔记之——监督学习之决策树
2020-12-02 17:20:08这一系列问题可以表示为一棵决策树,如下图所示: mglearn.plots.plot_animal_tree() 在这张图中,树的每个结点代表一个问题或一个包含答案的终结点(也叫叶结点)。树的边将问题的答案与将问的下一个问题连接起来... -
【机器学习】K近邻(knn)算法是如何完成分类的?
2020-06-15 10:59:565 看看KNN的决策边界是什么样的 绘制决策边界还是相对麻烦的,这里提供一下相关代码: def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1, cm='viridis', linewidth=None, threshold=... -
【笔记】【机器学习基础】监督学习算法3
2022-05-08 00:28:59in enumerate(zip(axes.ravel(), forest.estimators_)): ax.set_title("Tree {}".format(i)) mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax) mglearn.plots.plot_2d_separator(forest, X_train,... -
Python机器学习基础教程-第2章-监督学习之决策树集成
2021-02-10 14:26:43)): ax.set_title("Tree {}".format(i)) mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax) mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1,-1], alpha=.4) axes[-1,-1]... -
【python机器学习基础教程】(二)
2022-04-30 21:08:31mglearn.plots.plot_2d_separator(forest,X_train,fill=True,ax=axes[-1,1],alpha=.4) axes[-1,1].set_title("Random Forest") mglearn.discrete_scatter(X_train[:,0],X_train[:,1],y_train) def plot_feature_... -
监督学习(二)——K近邻(K-NN)
2019-01-13 18:32:00mglearn.plots.plot_2d_separator(clf, X, fill=True, eps=0.5, ax=ax, alpha=.4) mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax) ax.set_title("{} neighbor(s)".format(n_neighbors)) ax.set_xlabel(... -
scikit-learn__02.1__iMachine-Learnng
2019-11-05 11:11:07from fig_code import plot_sgd_separator plot_sgd_separator() 这看似微不足道,但却是一个非常重要概念的简单版本。 通过画出这条分隔线,我们学会了一个可以推广到新数据的模型:如果将另一个未标记的点放到... -
机器学习学习笔记之——监督学习之线性模型
2020-11-26 22:05:43plt.plot(ridge.coef_, 's', label="Ridge alpha=1") plt.plot(ridge10.coef_, "^", label='Rifge alpha=10') plt.plot(ridge01.coef_, "v", label="Ridge alpha=0.1") plt.plot(lr.coef_, 'o', label="Linear... -
监督学习(七)——决策树集成:随机森林
2019-01-18 15:02:00mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4) axes[-1, -1].set_title("Random Forest") mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.show... -
监督学习算法——线性模型——《python机器学习基础教程》
2020-03-04 21:47:14plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1") plt.plot(lr.coef_, 'o', label="LinearRegression") plt.xlabel("Coefficient index") plt.ylabel("Coefficient magnitude") xlims = plt.xlim() plt.... -
SciPyCon 2018 sklearn 教程(上)
2018-09-30 16:25:27plot_surface ( xgrid , ygrid , im , cmap = plt . cm . viridis , cstride = 2 , rstride = 2 , linewidth = 0 ) ; 有许多可用的绘图类型。 探索它们的一个实用方法是查看matplotlib库。 你可以在... -
【机器学习】(六)线性模型:线性回归、岭回归、Lasso;Logistic回归、线性SVM
2020-07-18 00:19:39可视化:mglearn.plots.plot_linear_regression_wave() 对单一特征预测结果是一条直线。2个特征是一个平面,更高维度(更多特征)时是一个超平面。 回归问题 线性回归OLS:LinearRegression 线性回归/普通最小... -
sklearn库学习之线性模型
2018-10-17 22:06:00mglearn.plots.plot_2d_separator(clf, X, fill = False, eps = 0.5, ax = ax, alpha = 0.7) #决策边界可视化 mglearn.discrete_scatter(X[:,0],X[:,1],y,ax = ax) #画点 ax.set_title("{}".format(clf.__class...