《机器学习》 西瓜书的代码实现.
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv("work/西瓜数据集3.0α.txt")
data
yes = data[data['Good melon'].isin(['是'])]
no = data[data['Good melon'].isin(['否'])]
fig, ax = plt.subplots(figsize=(12, 8))
ax.scatter(yes['Density'], yes['Sugar content'], marker='o', c='b', label='Yes')
ax.scatter(no['Density'], no['Sugar content'], marker='x', c='r', label='No')
ax.legend()
ax.set_xlabel('Density')
ax.set_ylabel('Sugar content')
plt.show()
没有什么确切的分类界面.
from sklearn import svm
linear_svc = svm.SVC(kernel='linear')
rbf_svc = svm.SVC(kernel='rbf')
temp = {'是': 1, '否': -1}
X = np.array(data.iloc[:, :2])
y = np.array(data.iloc[:, 2].replace(temp))[None].T
linear_svc.fit(X, y)
rbf_svc.fit(X, y)
linear_svc.support_vectors_
rbf_svc.support_vectors_
from sklearn import datasets
iris = datasets.load_iris()
X = iris['data']
y = iris['target'][None].T
linear_svc.fit(X, y)
rbf_svc.fit(X, y)
linear_svc.score(X, y), rbf_svc.score(X, y)
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf.fit(X, y)
clf.score(X, y)
这里使用 "密度" 作为输入, "含糖率" 为输出.
rbf_svr = svm.SVR(kernel='rbf')
rbf_svr.fit(np.array(data.iloc[:, 0])[None].T, np.array(data.iloc[:, 1])[None].T)
rbf_svr.score(np.array(data.iloc[:, 0])[None].T, np.array(data.iloc[:, 1])[None].T)
挺不错的.