精华内容
下载资源
问答
  • plot_2d_separator
    千次阅读
    2018-11-21 17:03:43
    #绘制决策边界,多个近邻比较(多图合并)
    fig, axes = plt.subplots(1, 3, figsize=(10, 3))  #绘制多个画板
    for neighbors, ax in zip([1, 3, 9], axes):
        knn = KNeighborsClassifier(n_neighbors=neighbors)
        knn.fit(X, y)
        mglearn.plots.plot_2d_separator(knn, X, fill=True, eps=0.5, ax=ax, alpha=.4) #绘制分界线
        mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax)
        ax.set_title("{} neighbor(s)".format(neighbors))
        ax.set_xlabel("feature 0")
        ax.set_ylabel("feature 1")
    axes[0].legend(loc=3)

     plot_2d_separator

    对于二维数据集,还可以在xy平面上画出所有可能的测试点的预测结果。我们根据平面中每个点所属的类别对平面进行着色。这样可以查看决策边界(decision boundary),即算法对类别0和类别1的分界线

    更多相关内容
  • 1 import numpy as np 2 import matplotlib.pyplot as plt 3 from .plot_helpers import cm2, cm3, discrete_scatter 4 5 def _call_classifier_chunked(classifier_pred_or_decide, X): 6 # The chu...
      1 import numpy as np
      2 import matplotlib.pyplot as plt
      3 from .plot_helpers import cm2, cm3, discrete_scatter
      4 
      5 def _call_classifier_chunked(classifier_pred_or_decide, X):
      6 # The chunk_size is used to chunk the large arrays to work with x86
      7 # memory models that are restricted to < 2 GB in memory allocation. The
      8 # chunk_size value used here is based on a measurement with the
      9 # MLPClassifier using the following parameters:
     10 # MLPClassifier(solver='lbfgs', random_state=0,
     11 # hidden_layer_sizes=[1000,1000,1000])
     12 # by reducing the value it is possible to trade in time for memory.
     13 # It is possible to chunk the array as the calculations are independent of
     14 # each other.
     15 # Note: an intermittent version made a distinction between
     16 # 32- and 64 bit architectures avoiding the chunking. Testing revealed
     17 # that even on 64 bit architectures the chunking increases the
     18 # performance by a factor of 3-5, largely due to the avoidance of memory
     19 # swapping.
     20 chunk_size = 10000
     21 
     22 # We use a list to collect all result chunks
     23 Y_result_chunks = []
     24 
     25 # Call the classifier in chunks.
     26 for x_chunk in np.array_split(X, np.arange(chunk_size, X.shape[0],
     27 chunk_size, dtype=np.int32),
     28 axis=0):
     29 Y_result_chunks.append(classifier_pred_or_decide(x_chunk))
     30 
     31 return np.concatenate(Y_result_chunks)
     32 
     33 
     34 def plot_2d_classification(classifier, X, fill=False, ax=None, eps=None,
     35 alpha=1, cm=cm3):
     36 # multiclass
     37 if eps is None:
     38 eps = X.std() / 2.
     39 
     40 if ax is None:
     41 ax = plt.gca()
     42 
     43 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
     44 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
     45 xx = np.linspace(x_min, x_max, 1000)
     46 yy = np.linspace(y_min, y_max, 1000)
     47 
     48 X1, X2 = np.meshgrid(xx, yy)
     49 X_grid = np.c_[X1.ravel(), X2.ravel()]
     50 decision_values = classifier.predict(X_grid)
     51 ax.imshow(decision_values.reshape(X1.shape), extent=(x_min, x_max,
     52 y_min, y_max),
     53 aspect='auto', origin='lower', alpha=alpha, cmap=cm)
     54 ax.set_xlim(x_min, x_max)
     55 ax.set_ylim(y_min, y_max)
     56 ax.set_xticks(())
     57 ax.set_yticks(())
     58 
     59 
     60 def plot_2d_scores(classifier, X, ax=None, eps=None, alpha=1, cm="viridis",
     61 function=None):
     62 # binary with fill
     63 if eps is None:
     64 eps = X.std() / 2.
     65 
     66 if ax is None:
     67 ax = plt.gca()
     68 
     69 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
     70 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
     71 xx = np.linspace(x_min, x_max, 100)
     72 yy = np.linspace(y_min, y_max, 100)
     73 
     74 X1, X2 = np.meshgrid(xx, yy)
     75 X_grid = np.c_[X1.ravel(), X2.ravel()]
     76 if function is None:
     77 function = getattr(classifier, "decision_function",
     78 getattr(classifier, "predict_proba"))
     79 else:
     80 function = getattr(classifier, function)
     81 decision_values = function(X_grid)
     82 if decision_values.ndim > 1 and decision_values.shape[1] > 1:
     83 # predict_proba
     84 decision_values = decision_values[:, 1]
     85 grr = ax.imshow(decision_values.reshape(X1.shape),
     86 extent=(x_min, x_max, y_min, y_max), aspect='auto',
     87 origin='lower', alpha=alpha, cmap=cm)
     88 
     89 ax.set_xlim(x_min, x_max)
     90 ax.set_ylim(y_min, y_max)
     91 ax.set_xticks(())
     92 ax.set_yticks(())
     93 return grr
     94 
     95 
     96 def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1,
     97 cm=cm2, linewidth=None, threshold=None,
     98 linestyle="solid"):
     99 # binary?
    100 if eps is None:
    101 eps = X.std() / 2.
    102 
    103 if ax is None:
    104 ax = plt.gca()
    105 
    106 x_min, x_max = X[:, 0].min() - eps, X[:, 0].max() + eps
    107 y_min, y_max = X[:, 1].min() - eps, X[:, 1].max() + eps
    108 xx = np.linspace(x_min, x_max, 1000)
    109 yy = np.linspace(y_min, y_max, 1000)
    110 
    111 X1, X2 = np.meshgrid(xx, yy)
    112 X_grid = np.c_[X1.ravel(), X2.ravel()]
    113 if hasattr(classifier, "decision_function"):
    114 decision_values = _call_classifier_chunked(classifier.decision_function,
    115 X_grid)
    116 levels = [0] if threshold is None else [threshold]
    117 fill_levels = [decision_values.min()] + levels + [
    118 decision_values.max()]
    119 else:
    120 # no decision_function
    121 decision_values = _call_classifier_chunked(classifier.predict_proba,
    122 X_grid)[:, 1]
    123 levels = [.5] if threshold is None else [threshold]
    124 fill_levels = [0] + levels + [1]
    125 if fill:
    126 ax.contourf(X1, X2, decision_values.reshape(X1.shape),
    127 levels=fill_levels, alpha=alpha, cmap=cm)
    128 else:
    129 ax.contour(X1, X2, decision_values.reshape(X1.shape), levels=levels,
    130 colors="black", alpha=alpha, linewidths=linewidth,
    131 linestyles=linestyle, zorder=5)
    132 
    133 ax.set_xlim(x_min, x_max)
    134 ax.set_ylim(y_min, y_max)
    135 ax.set_xticks(())
    136 ax.set_yticks(())
    137 
    138 
    139 if __name__ == '__main__':
    140 from sklearn.datasets import make_blobs
    141 from sklearn.linear_model import LogisticRegression
    142 X, y = make_blobs(centers=2, random_state=42)
    143 clf = LogisticRegression(solver='lbfgs').fit(X, y)
    144 plot_2d_separator(clf, X, fill=True)
    145 discrete_scatter(X[:, 0], X[:, 1], y)
    146 plt.show()
    
    展开全文
  • Plplot绘制简单2D函数图
  • 神经网络(深度学习) 深度学习算法往往经过精确调整,只适用于特定的应用场景。这里讨论简单的用于分类和回归的多层感知机(multilayer ...display(mglearn.plots.plot_logistic_regression_graph()) 输入特征...

    神经网络(深度学习)

    深度学习算法往往经过精确调整,只适用于特定的应用场景。这里讨论简单的用于分类和回归的多层感知机(multilayer perceptron, MLP),MLP也被称为前馈神经网络/神经网络。

    1.神经网络模型

    MLP可以被视为广义的线性模型。

    display(mglearn.plots.plot_logistic_regression_graph())

     输入特征×系数 加权求和即输出,计算中间的隐单元(hidden unit),再加权求和。

    display(mglearn.plots.plot_single_hidden_layer_graph())

    这个模型需要更多的系数(权重)。为了让这玩意儿比线性模型更强大,在计算完每个隐单元的加权求和后,对结果再应用一个非线性函数----通常是校正非线性(rectifying nonlinearity,也叫校正线性单元或 relu)或正切双曲线(tangens hyperbolicus, tanh)。然后将这个函数的结果用于加权求和,计算输出结果。relu 截断小于0的值,而tanh 在输入值较小时接近-1,较大时接近1,用这两种模型给深度学习加buff。

    line = np.linspace(-3, 3, 100)
    plt.plot(line, np.tanh(line), label="tanh")
    plt.plot(line, np.maximum(line, 0), label="relu")
    plt.legend(loc="best")
    plt.xlabel("x")
    plt.ylabel("relu(x), tanh(x)")
    plt.show()

     

    权重都是要从数据中学习得到,需要我们设置的只有结点数,有的时候它可以小到10,有时可以大到10000.

    mglearn.plots.plot_two_hidden_layer_graph()

    2.神经网络调参

    我们使用MLPClassifier

    from sklearn.neural_network import MLPClassifier
    from sklearn.datasets import make_moons
    X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
    mlp = MLPClassifier(solver='lbfgs', random_state=0).fit(X_train, y_train)
    mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
    mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
    plt.xlabel("Feature 0")
    plt.ylabel("Feature 1")
    plt.show()

     这个决策边界时非线性的,但是相对平滑,且用到了solver='lbfgs'。默认情况,使用100个隐结点,我们可以减少结点数量但仍然有个很好的结果。

    from sklearn.neural_network import MLPClassifier
    from sklearn.datasets import make_moons
    X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
    mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10])
    mlp.fit(X_train, y_train)
    mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
    mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
    plt.xlabel("Feature 0")
    plt.ylabel("Feature 1")
    plt.show()

     单元更少时,决策边缘参差不齐,默认的非线性是 relu。如果使用单隐层,那么决策函数将由10个直线段组成。

    from sklearn.neural_network import MLPClassifier
    from sklearn.datasets import make_moons
    X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
    mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10, 10])
    mlp.fit(X_train, y_train)
    mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
    mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
    plt.xlabel("Feature 0")
    plt.ylabel("Feature 1")
    plt.show()

    这次使用tanh非线性

    from sklearn.neural_network import MLPClassifier
    from sklearn.datasets import make_moons
    X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
    mlp = MLPClassifier(solver='lbfgs', activation='tanh', random_state=0, hidden_layer_sizes=[10, 10])
    mlp.fit(X_train, y_train)
    mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3)
    mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train)
    plt.xlabel("Feature 0")
    plt.ylabel("Feature 1")
    plt.show()

     

     最后我们还可以使用L2惩罚使权重趋向于0,正如岭回归和线性分类器中所做的那样。MLPCassifier 中调节L2惩罚的参数是alpha (与线性回归模型相同),默认值很小(弱正则化)。

    from sklearn.neural_network import MLPClassifier
    from sklearn.datasets import make_moons
    X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, random_state=42)
    fig, axes = plt.subplots(2, 4, figsize=(20, 8))
    for axx, n_hidden_nodes in zip(axes, [10, 100]):
        for ax, alpha in zip(axx, [0.0001, 0.01, 0.1, 1]):
            mlp = MLPClassifier(solver='lbfgs', random_state=0,
                                hidden_layer_sizes=[n_hidden_nodes, n_hidden_nodes], alpha=alpha)
            mlp.fit(X_train, y_train)
            mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax)
            mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax)
            ax.set_title("n_hidden=[{}, {}]\nalpha={:.4f}".format(
                n_hidden_nodes, n_hidden_nodes, alpha))
    plt.show()
    

     控制神经网络的复杂度的方法有:隐层的个数、每个隐层中的单元个数与正则化。神经网络最开始权重是随机设置的,因此随机种子会影响它呢。

    fig, axes = plt.subplots(2, 4, figsize=(20, 8))
    for i, ax in enumerate(axes.ravel()):
        mlp = MLPClassifier(solver='lbfgs', random_state=i, hidden_layer_sizes=[100, 100])
        mlp.fit(X_train, y_train)
        mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3, ax=ax)
        mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train, ax=ax)
    plt.show()

     换个数据继续

    from sklearn.datasets import load_breast_cancer
    cancer = load_breast_cancer()
    print("Cancer cata per-feature maxima\n{}".format(cancer.data.max(axis=0)))
    X_train, X_test, y_train, y_test = train_test_split(cancer.data, cancer.target, random_state=0)
    mlp = MLPClassifier(random_state=42)
    mlp.fit(X_train, y_train)
    print("Accuracy on training set: {:.2f}".format(mlp.score(X_train, y_train)))
    print("Accuracy on test set: {:.2f}".format(mlp.score(X_test, y_test)))

    MLP的精度相当好,但没有其他模型号,较之前的SVC相同,神经网络也要求所有输入特征的变化范围类似,最理想是均值为0,方差为1。这里人工完成,后期将用 StandardScaler 完成。

    # 计算训练集中每个特征的平均值
    mean_on_train = X_train.mean(axis=0)
    # 计算训练集中每个特征的标准差
    std_on_train = X_train.std(axis=0)
    # 减去平均值,然后乘以标准差的倒数
    # 运算后,mean=0, std=1
    X_train_scaled = (X_train - mean_on_train) / std_on_train
    X_test_scaled = (X_test - mean_on_train) / std_on_train
    # 对测试集作相同的变换
    mlp = MLPClassifier(random_state=0)
    mlp.fit(X_train_scaled, y_train)
    print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))

    模型给出了超过迭代次数的警告,这是用于学习模型的 adam 算法的一部分,接下来增加迭代次数。

    mlp = MLPClassifier(max_iter=1000, random_state=0)
    mlp.fit(X_train_scaled, y_train)
    print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))

    增加迭代次数提高了训练集的精度,但是我们需要泛化能力的提高,因此我们尝试降低模型复杂度,这里我们选择增加 alpha 参数(变化范围从0.0001到1),向权重添加更强的正则化。(注明,这个程序运行的是偶训练集的精度和书上不一致达到了1.000

    mlp = MLPClassifier(max_iter=1000, alpha=1, random_state=0)
    mlp.fit(X_train_scaled, y_train)
    print("Accuracy on training set: {:.3f}".format(mlp.score(X_train_scaled, y_train)))
    print("Accuracy on test set: {:.3f}".format(mlp.score(X_test_scaled, y_test)))

    想要观察模型学到了什么,一种方法是查看模型的权重。行对用30个输入特征,列对应100个隐单元。

    plt.figure(figsize=(20, 5))
    plt.imshow(mlp.coefs_[0], interpolation='none', cmap='viridis')
    plt.yticks(range(30), cancer.feature_names)
    plt.xlabel("Columns in weight matrix")
    plt.ylabel("Input feature")
    plt.colorbar()

     我们可以推断,有些特征的重要性不是很大,有可能是隐单元的权重较小,也有可能是我们没有用神经网络可以使用的方式来表示它们。也可以将连接隐层和输出层可视化,但难以解释就算廖。虽然 MLPClassifier 和 MLPRegressor 为最常见的神经网络架构提供了易于使用的接口,但是他们只包含神经网络潜在的一部分。

    最为完善的是基于 theano 库的 keras 、lasagna  和 tensor-flow ,这些库提供了更为灵活的接口,那些流行的深度学习库支持GPU。

    3.优点、缺点和参数

    主要优点是能够获取大量数据中包含的信息,并构建无比复杂的模型。缺点就是需要大量时间,最好数据均匀,对于不均匀的数据树的表现可能更好。调节神经模型和训练模型的方法有很多种。

    估计神经网络的复杂度。最重要的参数层数和每层的隐单元的个数。在考虑神经模型复杂度的时,一个有用的度量是学到的权重的个数。神经网络通常的调参方法是首先创建一个大到足以过拟合的网络,确保网络可以对任务进行学习。之后,要么缩小网络,要么增大alpha来增强正则化,这可以提高泛化性能

    在我们的实验中,主要关注模型的定义:层数、每层的结点个数、正则化和非线性。对于如何学习模型和用来学习参数的算法,由参数 solver 决定,它有两个好用的选项,默认是 ‘adam’ ,大多数情况都很好,但对数据缩放很敏感。另一个是 ‘lbfgs’ ,其鲁棒性相当好,但在大型模型和大型数据集上的时间会比较长。还有更高级的选项 ‘sgd’ 。

    fit() 会重置模型

    展开全文
  • 对mglearn库的理解

    万次阅读 2019-01-15 12:04:14
    mglearn.plots.plot_linear_regression_wave() 2:这个库中有许多配置好的配色方案,在用pyplot作图时可以方便的调用 这是定位到的源代码: # create a smooth transition from the first to to the...

    先看该库的配置文件

    from . import plots
    from . import tools
    from .plots import cm3, cm2
    from .tools import discrete_scatter
    from .plot_helpers import ReBl
    
    __version__ = "0.1.7"
    
    __all__ = ['tools', 'plots', 'cm3', 'cm2', 'discrete_scatter', 'ReBl']
    
    

    下面是简单的理解,暂时写了4条:

    1:这个库中的plots模块内有许多已经做好的数据视图。方便查看,帮助你省略了构建的过程。
      列:一行简单的代码,就可以查看线性回归在回归问题中的使用情况。

    mglearn.plots.plot_linear_regression_wave()
    

    在这里插入图片描述
      例:一行代码就可以查看,参数C的大小对LinearSVC算法的影响

    mglearn.plots.plot_linear_svc_regularization()
    

    在这里插入图片描述

    2:这个库中的cm2、cm3内有配置好的配色方案,在用pyplot作图时可以方便的调用
    这是定位到的源代码:

    # create a smooth transition from the first to to the second color of cm3
    # similar to RdBu but with our red and blue, also not going through white,
    # which is really bad for greyscale
    
    cdict = {'red': [(0.0, 0.0, cm2(0)[0]),
                     (1.0, cm2(1)[0], 1.0)],
    
             'green': [(0.0, 0.0, cm2(0)[1]),
                       (1.0, cm2(1)[1], 1.0)],
    
             'blue': [(0.0, 0.0, cm2(0)[2]),
                      (1.0, cm2(1)[2], 1.0)]}
    
    ReBl = LinearSegmentedColormap("ReBl", cdict)
    
    

      例:

        ax.plot(X_train, y_train, '^', c=mglearn.cm2(0), markersize=8)
        ax.plot(X_test, y_test, 'v', c=mglearn.cm2(1), markersize=8)
    

    3.这个库引用的sklearn.datasets模块内,有加载和获取常用数据集、人工生成数据集的方法。

      例:加载波士顿房价数据集(带有load关键字的方法,均为加载库中已有数据。)

    X, y = mglearn.datasets.load_extended_boston()
    

      例:人工生成数据(带有make关键字的方法)。

    from sklearn.datasets import make_moons
    X, y = make_moons(n_samples=100, noise=0.25, random_state=3)
    

    4:这个库中的plots模块对matplotlib.pyplot做出了修改,更适合绘制类和集群的视图。
      例:

    mglearn.plots.plot_2d_separator(clf, X, fill=False, eps=0.5,
                                        ax=ax, alpha=.7)
    
    展开全文
  • mlp = MLPClassifier(solver='lbfgs', random_state=0, hidden_layer_sizes=[10]) mlp.fit(X_train, y_train) mglearn.plots.plot_2d_separator(mlp, X_train, fill=True, alpha=.3) mglearn.discrete_scatter(X_...
  • (下面用 LinearSVC举例) from sklearn.svm import LinearSVC linear_svm = LinearSVC().fit(X, y) mglearn.plots.plot_2d_separator(linear_svm, X) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.xlabel(...
  • mglearn.plots.plot_2d_separator(linear_svm, X) mglearn.discrete_scatter(X[:, 0], X[:, 1], y) plt.xlabel("Feature 0") plt.ylabel("Feature 1") plt.show()   我们对输入特征进行扩展,将每个数据点...
  • 我们将训练点画成圆,将测试数据画成三角: fig, axes = plt.subplots(1, 2, figsize=(13, 5)) mglearn.tools.plot_2d_separator(gbrt, X, ax=axes[0], alpha=.4, fill=True, cm=mglearn.cm2) scores_image = ...
  • Python监督学习_决策树

    2020-01-19 23:07:18
    mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4) axes[-1, -1].set_title("Random forest") mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.show...
  • mglearn.tools.plot_2d_separator(gbrt, X, ax=axes[0], alpha=.4, fill=True, cm=mglearn.cm2) scores_image = mglearn.tools.plot_2d_scores(gbrt, X, ax=axes[1], alpha=.4, cm=mglearn.ReBl) for ax in axes: ...
  • 这一系列问题可以表示为一棵决策树,如下图所示: mglearn.plots.plot_animal_tree() 在这张图中,树的每个结点代表一个问题或一个包含答案的终结点(也叫叶结点)。树的边将问题的答案与将问的下一个问题连接起来...
  • 5 看看KNN的决策边界是什么样的 绘制决策边界还是相对麻烦的,这里提供一下相关代码: def plot_2d_separator(classifier, X, fill=False, ax=None, eps=None, alpha=1, cm='viridis', linewidth=None, threshold=...
  • in enumerate(zip(axes.ravel(), forest.estimators_)): ax.set_title("Tree {}".format(i)) mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax) mglearn.plots.plot_2d_separator(forest, X_train,...
  • )): ax.set_title("Tree {}".format(i)) mglearn.plots.plot_tree_partition(X_train, y_train, tree, ax=ax) mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1,-1], alpha=.4) axes[-1,-1]...
  • 【python机器学习基础教程】(二)

    千次阅读 2022-04-30 21:08:31
    mglearn.plots.plot_2d_separator(forest,X_train,fill=True,ax=axes[-1,1],alpha=.4) axes[-1,1].set_title("Random Forest") mglearn.discrete_scatter(X_train[:,0],X_train[:,1],y_train) def plot_feature_...
  • mglearn.plots.plot_2d_separator(clf, X, fill=True, eps=0.5, ax=ax, alpha=.4) mglearn.discrete_scatter(X[:, 0], X[:, 1], y, ax=ax) ax.set_title("{} neighbor(s)".format(n_neighbors)) ax.set_xlabel(...
  • from fig_code import plot_sgd_separator plot_sgd_separator() 这看似微不足道,但却是一个非常重要概念的简单版本。 通过画出这条分隔线,我们学会了一个可以推广到新数据的模型:如果将另一个未标记的点放到...
  • plt.plot(ridge.coef_, 's', label="Ridge alpha=1") plt.plot(ridge10.coef_, "^", label='Rifge alpha=10') plt.plot(ridge01.coef_, "v", label="Ridge alpha=0.1") plt.plot(lr.coef_, 'o', label="Linear...
  • mglearn.plots.plot_2d_separator(forest, X_train, fill=True, ax=axes[-1, -1], alpha=.4) axes[-1, -1].set_title("Random Forest") mglearn.discrete_scatter(X_train[:, 0], X_train[:, 1], y_train) plt.show...
  • plt.plot(ridge01.coef_, 'v', label="Ridge alpha=0.1") plt.plot(lr.coef_, 'o', label="LinearRegression") plt.xlabel("Coefficient index") plt.ylabel("Coefficient magnitude") xlims = plt.xlim() plt....
  • SciPyCon 2018 sklearn 教程(上)

    万次阅读 2018-09-30 16:25:27
    plot_surface ( xgrid , ygrid , im , cmap = plt . cm . viridis , cstride = 2 , rstride = 2 , linewidth = 0 ) ; 有许多可用的绘图类型。 探索它们的一个实用方法是查看matplotlib库。 你可以在...
  • 可视化:mglearn.plots.plot_linear_regression_wave() 对单一特征预测结果是一条直线。2个特征是一个平面,更高维度(更多特征)时是一个超平面。 回归问题 线性回归OLS:LinearRegression 线性回归/普通最小...
  • mglearn.plots.plot_2d_separator(clf, X, fill = False, eps = 0.5, ax = ax, alpha = 0.7) #决策边界可视化 mglearn.discrete_scatter(X[:,0],X[:,1],y,ax = ax) #画点 ax.set_title("{}".format(clf.__class...

空空如也

空空如也

1 2 3 4 5 ... 20
收藏数 489
精华内容 195
关键字:

plot_2d_separator