• 1 最小二乘回归HappyWang：机器学习（1）--线性回归和多项式拟合​zhuanlan.zhihu.comfrom sklearn import linear_model,datasets from sklearn.metrics import mean_squared_error,r2_score import matplotlib....
1 最小二乘回归HappyWang：机器学习（1）--线性回归和多项式拟合​zhuanlan.zhihu.comfrom sklearn import linear_model,datasets
from sklearn.metrics import mean_squared_error,r2_score
import matplotlib.pyplot as plt
import numpy as np

#定义样本和特征数量
num_sample=1000
num_feature=1
# weight=[2,-3.4]
weight=-3.4
b_true=4.3
feature=np.random.normal(size=(num_sample,num_feature))
# label=weight[0]*feature[:,0]+weight[1]*feature[:,1]+b_true+np.random.normal(size=(num_sample,num_feature))
label=weight*feature+b_true+np.random.normal(size=(num_sample,num_feature))

# Split the data into training/testing sets
X_train = feature[:-100,:]
X_test = feature[-100:,:]

# Split the targets into training/testing sets
y_train = label[:-100]
y_test = label[-100:]
reg=linear_model.LinearRegression()
reg.fit(X_train,y_train)
y_predict=reg.predict(X_test)
print("mean_square_error:%.2f"%mean_squared_error(y_test,y_predict))
print('Coefficient of determination: %.2f'
% r2_score(y_test, y_predict))
print("coefficient of the model:%.2f"%reg.coef_)
# Plot outputs
plt.scatter(X_test, y_test,  color='black')
plt.plot(X_test, y_predict, color='blue', linewidth=3)

plt.xticks(())
plt.yticks(())

plt.show()2 Ridge回归（岭回归）岭回归是在最小二乘回归的基础上加上L2正则表达式得到，L2正则表达式可以防止模型过拟合表达式中  为样本索引，  为特征索引。在sklearn中通过调用linear_model中的Ridge(),该方法有8个可传入参数（均有默认值），详细可查看官网API。这里传入了两个参数alpha及fit_intercept，alpha即上式中的  值，fit_intercept为boolean类型的值，默认为True，表示模型使用截距（b）。import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import mean_squared_error,r2_score
#定义样本和特征数量
num_sample=1000
num_feature=1
# weight=[2,-3.4]
weight=-3.4
b_true=4.3
feature=np.random.normal(size=(num_sample,num_feature))
# label=weight[0]*feature[:,0]+weight[1]*feature[:,1]+b_true+np.random.normal(size=(num_sample,num_feature))
label=weight*feature+b_true+np.random.normal(size=(num_sample,num_feature))

# Split the data into training/testing sets
X_train = feature[:-100,:]
X_test = feature[-100:,:]

# Split the targets into training/testing sets
y_train = label[:-100]
y_test = label[-100:]

#创建Riage模型
reg=linear_model.Ridge(alpha=0.5,fit_intercept=True)
#传入训练集训练模型
reg.fit(X_train,y_train)
#传入测试集得到结果
y_predict=reg.predict(X_test)
#求解均方误差
print("mean_square_error:%.2f"%mean_squared_error(y_test,y_predict))
#求解R^2,最大值为1，越接近1，模型越完美
print('Coefficient of determination: %.2f'
% r2_score(y_test, y_predict))
print("Coefficient of the model:%.2f"%reg.coef_)
print("intercept of the model:%.2f"%reg.intercept_)

ax =plt.subplot(111)
ax.scatter(X_test,y_test)
ax.plot(X_test,y_predict)
# ax.set_zlabel('Z')
# 坐标轴
ax.set_ylabel('Y')
ax.set_xlabel('X')

plt.show()3  带交叉验证的岭回归带交叉验证的岭回归提供 多个 进行交叉验证训练，并输出效果最好的一种。在sklearn中通过调用linear_model中的RidgeCV()，主要参数有alphas和cv等，详见官网API。import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import mean_squared_error,r2_score
#定义样本和特征数量
num_sample=1000
num_feature=1
# weight=[2,-3.4]
weight=-3.4
b_true=4.3
#生成曲线
feature=np.random.normal(size=(num_sample,num_feature))
# label=weight[0]*feature[:,0]+weight[1]*feature[:,1]+b_true+np.random.normal(size=(num_sample,num_feature))
label=weight*feature+b_true+np.random.normal(size=(num_sample,num_feature))

# Split the data into training/testing sets
X_train = feature[:-100,:]
X_test = feature[-100:,:]

# Split the targets into training/testing sets
y_train = label[:-100]
y_test = label[-100:]

reg=linear_model.RidgeCV(alphas=[0.1,0.3,0.5,0.7,1],fit_intercept=True)
reg.fit(X_train,y_train)
y_predict=reg.predict(X_test)
print("mean_square_error:%.2f"%mean_squared_error(y_test,y_predict))
print('Coefficient of determination: %.2f'
% r2_score(y_test, y_predict))
# print('the score of model : %.2f'
#       % reg.score(X_test,y_test))
print("Coefficient of the model:%.2f"%reg.coef_)
print("intercept of the model:%.2f"%reg.intercept_)
print("the optimal alpha is: %.2f"%reg.alpha_)
# w=reg.coef_
# b=reg.intercept_
# x1=np.linspace(-5,5,1000)
# # x2=np.linspace(-5,5,1000)
# y=w[0]*x1+b
ax =plt.subplot(111)
ax.scatter(X_test,y_test)
ax.plot(X_test,y_predict)
# ax.set_zlabel('Z')
# 坐标轴
ax.set_ylabel('Y')
ax.set_xlabel('X')

plt.show()4 Lasso回归Lasso回归是在最小二乘回归的基础上加上L1正则表达式得到，L1正则表达式同样可以防止模型过拟合。在sklearn中通过调用linear_model中的Lasso(),该方法有11个可传入参数（均有默认值），详细可查看官网API。这里传入了两个参数alpha及fit_intercept，alpha即上式中的  值，fit_intercept为boolean类型的值，默认为True，表示模型使用截距（b）。import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import mean_squared_error,r2_score
#定义样本和特征数量
num_sample=1000
num_feature=1
# weight=[2,-3.4]
weight=-3.4
b_true=4.3
feature=np.random.normal(size=(num_sample,num_feature))
# label=weight[0]*feature[:,0]+weight[1]*feature[:,1]+b_true+np.random.normal(size=(num_sample,num_feature))
label=weight*feature+b_true+np.random.normal(size=(num_sample,num_feature))

# Split the data into training/testing sets
X_train = feature[:-100,:]
X_test = feature[-100:,:]

# Split the targets into training/testing sets
y_train = label[:-100]
y_test = label[-100:]

reg=linear_model.Lasso(alpha=0.5,fit_intercept=True)
reg.fit(X_train,y_train)
y_predict=reg.predict(X_test)
print("mean_square_error:%.2f"%mean_squared_error(y_test,y_predict))
print('Coefficient of determination: %.2f'
% r2_score(y_test, y_predict))
# print('the score of model : %.2f'
#       % reg.score(X_test,y_test))
print("Coefficient of the model:%.2f"%reg.coef_)
print("intercept of the model:%.2f"%reg.intercept_)
# print("the optimal alpha is: %.2f"%reg.alpha_)
# w=reg.coef_
# b=reg.intercept_
# x1=np.linspace(-5,5,1000)
# # x2=np.linspace(-5,5,1000)
# y=w[0]*x1+b
ax =plt.subplot(111)
ax.scatter(X_test,y_test)
ax.plot(X_test,y_predict)
# ax.set_zlabel('Z')
# 坐标轴
ax.set_ylabel('Y')
ax.set_xlabel('X')

plt.show()5  带交叉验证的Lasso回归带交叉验证的岭回归提供 多个 进行交叉验证训练，并输出效果最好的一种。在sklearn中通过调用linear_model中的LassoCV()，主要参数有alphas和cv等，详见官网API。import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from mpl_toolkits.mplot3d import Axes3D
from sklearn.metrics import mean_squared_error,r2_score
#定义样本和特征数量
num_sample=1000
num_feature=1
# weight=[2,-3.4]
weight=-3.4
b_true=4.3
feature=np.random.normal(size=(num_sample,num_feature))
# label=weight[0]*feature[:,0]+weight[1]*feature[:,1]+b_true+np.random.normal(size=(num_sample,num_feature))
label=weight*feature+b_true+np.random.normal(size=(num_sample,num_feature))

# Split the data into training/testing sets
X_train = feature[:-100,:]
X_test = feature[-100:,:]

# Split the targets into training/testing sets
y_train = label[:-100]
y_test = label[-100:]

reg=linear_model.LassoCV()
reg.fit(X_train,y_train)
y_predict=reg.predict(X_test)
print("mean_square_error:%.2f"%mean_squared_error(y_test,y_predict))
print('Coefficient of determination: %.2f'
% r2_score(y_test, y_predict))
# print('the score of model : %.2f'
#       % reg.score(X_test,y_test))
print("Coefficient of the model:%.2f"%reg.coef_)
print("intercept of the model:%.2f"%reg.intercept_)
# print("the optimal alpha is: %.2f"%reg.alpha_)
# w=reg.coef_
# b=reg.intercept_
# x1=np.linspace(-5,5,1000)
# # x2=np.linspace(-5,5,1000)
# y=w[0]*x1+b
ax =plt.subplot(111)
ax.scatter(X_test,y_test)
ax.plot(X_test,y_predict)
# ax.set_zlabel('Z')
# 坐标轴
ax.set_ylabel('Y')
ax.set_xlabel('X')

plt.show()
展开全文
• sklearnLASSO算法应用 前言： 本文介绍LASSO算法理论以及sklearnLasso算法如何调用和使用 一、LASSO算法和岭回归算法对比 •Tibshirani(1996)提出了Lasso(The Least Absolute Shrinkage and Selectionator ...
sklearn之LASSO算法应用
前言： 本文介绍LASSO算法理论以及sklearn中Lasso算法如何调用和使用
一、LASSO算法和岭回归算法对比
•Tibshirani(1996)提出了Lasso(The Least Absolute Shrinkage and
Selectionator operator)算法。
• 通过构造一个一阶惩罚函数获得一个精炼的模型；通过最终确定一些
指标（变量）的系数为零（岭回归估计系数等于0的机会微乎其微，
造成筛选变量困难），解释力很强。
• 擅长处理具有多重共线性的数据，与岭回归一样是有偏估

LASSO算法的代价函数和岭回归的代价函数都是为了防止过拟合，但是，岭回归算法很难使得模型参数为0，而LASSO算法可以使得模型中的很多参数（对于本数据模型训练影响不大）为0，进而使得模型的特征要素减少
二、sklearn中应有LASSO算法示例代码
import numpy as np
from numpy import genfromtxt
from sklearn import linear_model

# 读取数据
data = genfromtxt(r'longley.csv', delimiter=',')

# 切分数据
x_data = data[1:, 2:]
y_data = data[1:, 1, np.newaxis]

# 训练模型
model = linear_model.LassoCV()
model.fit(x_data, y_data)

# 训练后选择的lasso系数
print(model.alpha_)
# 训练后线性模型参数
print(model.coef_)

# 预测值
print(model.predict(x_data[-2, np.newaxis]))
print(y_data[-2])  # 真实值


三、代码执行结果
20.03464209711722
[0.10206856 0.00409161 0.00354815 0.         0.         0.        ]
[115.6461414]
[115.7]

由上面结果可知，LASSO算法会自动得到一个合适的λ值，并且其训练出的模型中有很多参数为0 ，可以认为参数为0的特征是不重要的特征，可以舍弃，进而简化模型。
四、数据下载
链接：https://pan.baidu.com/s/14xi9nAW4DyY3mWFp_GTb0w
提取码：3kf3


展开全文
• sklearn中的LASSO

千次阅读 2019-02-27 18:40:31
LASSO import numpy as np import matplotlib.pyplot as plt np.random.seed(42) x = np.random.uniform(-3.0, 3.0, size=100) X = x.reshape(-1, 1) y = 0.5 * x + 3 + np.random.normal(0, 1, size=100) plt.sca...
LASSO
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(42)
x = np.random.uniform(-3.0, 3.0, size=100)
X = x.reshape(-1, 1)
y = 0.5 * x + 3 + np.random.normal(0, 1, size=100)

plt.scatter(x, y)
plt.show()


from sklearn.model_selection import train_test_split

np.random.seed(666)
X_train, X_test, y_train, y_test = train_test_split(X, y)

from sklearn.pipeline import Pipeline
from sklearn.preprocessing import PolynomialFeatures
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression

def PolynomialRegression(degree):
return Pipeline([
("poly", PolynomialFeatures(degree=degree)),
("std_scaler", StandardScaler()),
("lin_reg", LinearRegression())
])

from sklearn.metrics import mean_squared_error

poly_reg = PolynomialRegression(degree=20)
poly_reg.fit(X_train, y_train)

y_predict = poly_reg.predict(X_test)
mean_squared_error(y_test, y_predict)

167.94010867293571

def plot_model(model):
X_plot = np.linspace(-3, 3, 100).reshape(100, 1)
y_plot = model.predict(X_plot)

plt.scatter(x, y)
plt.plot(X_plot[:,0], y_plot, color='r')
plt.axis([-3, 3, 0, 6])
plt.show()

plot_model(poly_reg)


from sklearn.linear_model import Lasso

def LassoRegression(degree, alpha):
return Pipeline([
("poly", PolynomialFeatures(degree=degree)),
("std_scaler", StandardScaler()),
("lasso_reg", Lasso(alpha=alpha))
])

lasso1_reg = LassoRegression(20, 0.01)
lasso1_reg.fit(X_train, y_train)

y1_predict = lasso1_reg.predict(X_test)
mean_squared_error(y_test, y1_predict)

1.1496080843259966

plot_model(lasso1_reg)


lasso2_reg = LassoRegression(20, 0.1)
lasso2_reg.fit(X_train, y_train)

y2_predict = lasso2_reg.predict(X_test)
mean_squared_error(y_test, y2_predict)

1.1213911351818648

plot_model(lasso2_reg)


lasso3_reg = LassoRegression(20, 1)
lasso3_reg.fit(X_train, y_train)

y3_predict = lasso3_reg.predict(X_test)
mean_squared_error(y_test, y3_predict)

1.8408939659515595

plot_model(lasso3_reg)



展开全文
• LASSO回归就是在正常的线性回归的基础上增加一个L1正则化项。 参数 alpha L1正则化项的比例 fit_intercept 默认True，是否设置偏置 normalize 默认False 是否进行标准差标准化 precompute 是否使用余下计算的Gram...
LASSO回归就是在正常的线性回归的基础上增加一个L1正则化项。
为啥我每次LASSO都不如直接线性回归好用？
from sklearn.linear_model import Lasso

参数
alpha
L1正则化项的比例
fit_intercept
默认True，是否设置偏置
normalize
默认False 是否进行标准差标准化
precompute
是否使用余下计算的Gram矩阵加速计算。
max_iter
最大迭代次数
tol
判断迭代收敛的阈值
warm_start
是否使用上一次的训练结果继续训练
属性
coef_
train_x各个特征的权重
n_iter
迭代次数
intercept_
train_x各个特征的偏置
方法
fit
predict
score


展开全文
• sklearn中的Lasso函数

千次阅读 2018-05-23 20:31:00
Lasso(alpha=1.0, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=1e-4, warm_start=False, positive=False, random_state=None, selection=’cyclic’) 类型：  ...
• The Lasso 是估计稀疏系数的线性模型。 它在一些情况下是有用的，因为它倾向于使用具有较少参数值的情况，有效地减少给定解决方案所依赖变量的数量。 因此，Lasso 及其变体是压缩感知领域的基础。 在一定条件下，它...
• Lassosklearn.linear_model.Lassosklearn.linear...class sklearn.linear_model.Lasso(alpha=1.0, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=0.0001, warm_start=Fal.
• class sklearn.linear_model.Lasso(alpha=1.0, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=0.0001,warm_start=False, positive=False, random_state=None,...
• sklearn-1.1.3.Lasso

千次阅读 2018-06-04 21:44:31
我们使用类sklearn.linear_model.Lasso，它使用坐标下降算法。重要的是，这个实现在稀疏矩阵上比在这里使用的投影在计算上更加的有效。 具有L1惩罚的重建给出了零误差的结果（这里的所有像素点被标记为0或者1），...
• 机器学习算法笔记——P22 sklearnLASSO算法 #!/usr/bin/env python # coding: utf-8 # In[1]: import numpy as np from numpy import genfromtxt from sklearn import linear_model # In[2]: #读入数据 ...
• 文章目录Lasso概念• 定义• Lasso处理多重共线性原理二、linear_model.Lasso 类案例：Lasso特征选取① 读取数据集② 划分训练集、测试集③ 对线性回归、岭回归、Lasso进行对比④ 学习曲线 Lasso概念 • 定义 LASSO...
• Lasso的核心作用：特征...sklearn.linear_model.Lasso (alpha=1.0, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=0.0001, warm_start=False, positive=False, random...
• Lasso回归(也称套索回归),是一种正则化的线性回归。与岭回归相同，使用Lasso也是约束系数，使其接近于0，但使用的是L1正则化。lasso惩罚系数是向量的L1范数，换句话说，系数的绝对值之和。L1正则化的结果是，使用...
• 文章目录LASSO与ElasticNet-示例（sklearn实现）1. 导包2. 原始数据生成与展示3. LASSO+交叉验证的模型4. ElasticNet+交叉验证的模型 LASSO与ElasticNet-示例（sklearn实现） 1. 导包 import numpy as np from ...

...