【机器学习】Sklearn 常用分类器（全）
### KNN Classifier
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier()
clf.fit(train_x, train_y)
__________________________________________________________

### Logistic Regression Classifier
from sklearn.linear_model import LogisticRegression

clf = LogisticRegression(penalty='l2')
clf.fit(train_x, train_y)
__________________________________________________________

### Random Forest Classifier
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(n_estimators=8)
clf.fit(train_x, train_y)
__________________________________________________________

### Decision Tree Classifier
from sklearn import tree

clf = tree.DecisionTreeClassifier()
clf.fit(train_x, train_y)
__________________________________________________________

### GBDT(Gradient Boosting Decision Tree) Classifier

clf.fit(train_x, train_y)
__________________________________________________________

clf.fit(train_x, train_y)
__________________________________________________________

### GaussianNB
from sklearn.naive_bayes import GaussianNB

clf = GaussianNB()
clf.fit(train_x, train_y)
__________________________________________________________

### Linear Discriminant Analysis
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis

clf = LinearDiscriminantAnalysis()
clf.fit(train_x, train_y)
__________________________________________________________

clf.fit(train_x, train_y)
__________________________________________________________

### SVM Classifier
from sklearn.svm import SVC

clf = SVC(kernel='rbf', probability=True)
clf.fit(train_x, train_y)
__________________________________________________________

### Multinomial Naive Bayes Classifier
from sklearn.naive_bayes import MultinomialNB

clf = MultinomialNB(alpha=0.01)
clf.fit(train_x, train_y)


• MATLAB中分类器有：K近邻分类器，随机森林分类器，朴素贝叶斯，集成学习方法，鉴别分析分类器，支持向量机。 现将其主要函数使用方法总结如下，更多细节需参考MATLAB 帮助文件。设： 　训练样本 ：train_data 　...

MATLAB中分类器有：K近邻分类器，随机森林分类器，朴素贝叶斯，集成学习方法，鉴别分析分类器，支持向量机。

现将其主要函数使用方法总结如下，更多细节需参考MATLAB 帮助文件。
设：  　　训练样本 ：train_data  　　训练样本标签：train_label  　　测试样本 ：test_data  　　测试样本标签：test_label
K近邻分类器 （KNN）
mdl = ClassificationKNN.fit(train_data,train_label,’NumNeighbors’,1);  predict_label = predict(mdl, test_data);  accuracy = length(find(predict_label == test_label))/length(test_label)*100
随机森林分类器（Random Forest）
B = TreeBagger(nTree,train_data,train_label);  predict_label = predict(B,test_data);
朴素贝叶斯 （Naive Bayes）
nb = NaiveBayes.fit(train_data, train_label);  predict_label = predict(nb, test_data);  accuracy = length(find(predict_label == test_label))/length(test_label)*100;
集成学习方法（Ensembles for Boosting, Bagging, or Random Subspace）
鉴别分析分类器（discriminant analysis classifier）
obj = ClassificationDiscriminant.fit(train_data, train_label);  predict_label = predict(obj, test_data);
支持向量机（Support Vector Machine, SVM）
SVMStruct = svmtrain(train_data, train_label);  predict_label = svmclassify(SVMStruct, test_data)
• 常用分类器包括SVM、KNN、贝叶斯、线性回归、逻辑回归、决策树、随机森林、xgboost、GBDT、boosting、神经网络NN。 代码如下： from sklearn.metrics import precision_recall_fscore_support def ...


常用的分类器包括SVM、KNN、贝叶斯、线性回归、逻辑回归、决策树、随机森林、xgboost、GBDT、boosting、神经网络NN。

代码如下：

from

sklearn.metrics
import

precision_recall_fscore_support

def

timeDecor(func):

#一个用于统计函数运行时间的装饰器

def

innerDef(
*
args,
**
kwargs):

t1
=

time.time()

result
=

func(
*
args,
**
kwargs)

t2
=

time.time()

t
=

t2
-

t1

print

"{0}函数部分运行时间 ：{1}s"
.
format
(
str
(func.__name__),t)

return

result

return

innerDef

@timeDecor

def

svm_classify(X_train, y_train, X_test, y_test):

from

sklearn
import

svm

param_grid
=

{

# 'C': [1e3, 5e3, 1e4, 5e4, 1e5],

'kernel'
: [
'rbf'
,
'linear'
,
'poly'
,
'sigmoid'
],

# 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01, 0.1],

}

t0
=

time()

clf
=

svm.SVC()

clf.fit(X_train, y_train)

# print(clf.best_params_)

print
(
"svm done in %0.3fs"

%

(time()
-

t0))

pre_y_train
=

clf.predict(X_train)

pre_y_test
=

clf.predict(X_test)

print
(
"SVM Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

rf_classify(X_train, y_train, X_test, y_test):

from

sklearn.ensemble
import

RandomForestClassifier

t0
=

time()

clf
=

RandomForestClassifier(random_state
=
0
, n_estimators
=
500
)

clf.fit(X_train, y_train)

print
(
"rf done in %0.3fs"

%

(time()
-

t0))

pre_y_train
=

clf.predict(X_train)

pre_y_test
=

clf.predict(X_test)

print
(
"rf Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

knn_classify(X_train, y_train, X_test, y_test):

from

sklearn.neighbors
import

KNeighborsClassifier

t0
=

time()

clf
=

KNeighborsClassifier(n_neighbors
=
5
)

clf.fit(X_train, y_train)

print
(
"knn done in %0.3fs"

%

(time()
-

t0))

pre_y_train
=

clf.predict(X_train)

pre_y_test
=

clf.predict(X_test)

print
(
"knn Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

bagging_knn_classify(X_train, y_train, X_test, y_test):

from

sklearn.neighbors
import

KNeighborsClassifier

from

sklearn.ensemble
import

BaggingClassifier

t0
=

time()

clf
=

BaggingClassifier(KNeighborsClassifier(),

max_samples
=
0.5
, max_features
=
0.5
)

clf.fit(X_train, y_train)

print
(
"bagging_knn done in %0.3fs"

%

(time()
-

t0))

pre_y_test
=

clf.predict(X_test)

print
(
"bagging_knn Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

lr_classify(X_train, y_train, X_test, y_test):

from

sklearn.linear_model
import

LogisticRegression

t0
=

time()

clf
=

LogisticRegression(C
=
1e5
)

clf.fit(X_train, y_train)

print
(
"lr done in %0.3fs"

%

(time()
-

t0))

pre_y_train
=

clf.predict(X_train)

pre_y_test
=

clf.predict(X_test)

print
(
"lr Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

nb_classify(X_train, y_train, X_test, y_test):

from

sklearn.naive_bayes
import

GaussianNB

t0
=

time()

clf
=

GaussianNB()

clf.fit(X_train, y_train)

print
(
"nb done in %0.3fs"

%

(time()
-

t0))

pre_y_train
=

clf.predict(X_train)

pre_y_test
=

clf.predict(X_test)

print
(
"nb Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

da_classify(X_train, y_train, X_test, y_test):

from

sklearn.discriminant_analysis
import

t0
=

time()

clf
=

clf.fit(X_train, y_train)

print
(
"da done in %0.3fs"

%

(time()
-

t0))

pre_y_test
=

clf.predict(X_test)

print
(
"da Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

decisionTree_classify(X_train, y_train, X_test, y_test):

from

sklearn.tree
import

DecisionTreeClassifier

t0
=

time()

clf
=

DecisionTreeClassifier(max_depth
=
5
)

clf.fit(X_train, y_train)

print
(
"DT done in %0.3fs"

%

(time()
-

t0))

pre_y_test
=

clf.predict(X_test)

print
(
"DT Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

xgboost_classify(X_train, y_train, X_test, y_test):

import

xgboost

t0
=

time()

clf
=

xgboost.XGBClassifier()

clf.fit(X_train, y_train)

print
(
"xgboost done in %0.3fs"

%

(time()
-

t0))

pre_y_test
=

clf.predict(X_test)

print
(
"xgboost Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

GBDT_classify(X_train, y_train, X_test, y_test):

from

sklearn.ensemble
import

t0
=

time()

clf
=

=
200
)

clf.fit(X_train, y_train)

print
(
"GBDT done in %0.3fs"

%

(time()
-

t0))

pre_y_test
=

clf.predict(X_test)

print
(
"GBDT Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))

@timeDecor

def

voting_classify(X_train, y_train, X_test, y_test):

from

sklearn.ensemble
import

import

xgboost

from

sklearn.linear_model
import

LogisticRegression

from

sklearn.naive_bayes
import

GaussianNB

t0
=

time()

clf1
=

=
200
)

clf2
=

RandomForestClassifier(random_state
=
0
, n_estimators
=
500
)

# clf3 = LogisticRegression(random_state=1)

# clf4 = GaussianNB()

clf5
=

xgboost.XGBClassifier()

clf
=

VotingClassifier(estimators
=
[

# ('gbdt',clf1),

(
'rf'
,clf2),

# ('lr',clf3),

# ('nb',clf4),

# ('xgboost',clf5),

],

voting
=
'soft'

)

clf.fit(X_train, y_train)

print
(
"voting done in %0.3fs"

%

(time()
-

t0))

pre_y_test
=

clf.predict(X_test)

print
(
"voting Metrics : {0}"
.
format
(precision_recall_fscore_support(y_test, pre_y_test)))


...