《机器学习》 西瓜书的代码实现. 《机器学习》 西瓜书实例 第 5 章

我的博客: https://yunist.cn

《机器学习》西瓜书 第 5 章 编程实例

这里用的神经网络表示方法不是西瓜书里的, 而是我习惯的一种表达方式 (反正都是等价的) . 我个人不是很喜欢西瓜书里的表达方式, 它将阀值单独看待, 而且输入层和隐层的参数还分别对待, 我觉得这样表达很麻烦.

In [1]:
import sys
sys.path.append('/home/aistudio/external-libraries')
import numpy as np
np.set_printoptions(suppress=True)  # 禁用科学计数
import pandas as pd
import matplotlib.pyplot as plt
In [2]:
path = "work/西瓜数据集3.0.txt"
data = pd.read_csv(
    path,
    )
data.shape
Out[2]:
(17, 8)

由于数据是离散的, 要用 $\mathrm{one-hot}$ 处理.

In [3]:
from sklearn import preprocessing
enc = preprocessing.OneHotEncoder()
enc.fit(data.iloc[:, :6])
Out[3]:
OneHotEncoder(categorical_features=None, categories=None,
       dtype=<class 'numpy.float64'>, handle_unknown='error',
       n_values=None, sparse=True)
In [4]:
a = np.array(enc.transform(data.iloc[:, :6]).toarray())
b = np.array(data.iloc[:, 6]).reshape(data.shape[0], 1)
In [5]:
X = np.c_[a, b]
In [6]:
enc.fit(data.iloc[:, 7:])
Out[6]:
OneHotEncoder(categorical_features=None, categories=None,
       dtype=<class 'numpy.float64'>, handle_unknown='error',
       n_values=None, sparse=True)
In [7]:
y = np.array(enc.transform(data.iloc[:, 7:]).toarray())

神经网络

根据经验公式 $m = log_2(n)$ ( m 为隐层节点数, n 为输入层节点数) 可得隐层应为 5 层.

In [8]:
hidden_size = 5
input_size = X.shape[1]
num_labels = y.shape[1]
In [9]:
def sigmoid(x):
    return 1 / (1 + np.exp(-x))
In [10]:
def separate_params(params, hidden_size, input_size, num_labels):
    theta1 = np.reshape(params[:hidden_size * (input_size + 1)], (hidden_size, (input_size + 1)))
    theta2 = np.reshape(params[hidden_size * (input_size + 1):], (num_labels, (hidden_size + 1)))
    return [theta1, theta2]

前向传播

In [11]:
def forward_propagate(params, X, hidden_size, input_size, num_labels):
    theta = separate_params(params, hidden_size, input_size, num_labels)
    n = X.shape[0]
    ones = np.ones([n, 1])
    a_0 = np.c_[X, ones]  # 加全1列表示偏置项
    z_1 = a_0 @ theta[0].T
    a_1 = np.c_[sigmoid(z_1), ones]  # 同上
    z_2 = a_1 @ theta[1].T
    res = sigmoid(z_2)
    return a_0, z_1, a_1, z_2, res
In [12]:
def create_params(hidden_size, input_size, num_labels):
    return (np.random.random(size=hidden_size * (input_size + 1) + num_labels * (hidden_size + 1)) - 0.5) * 0.25  # 减0.5的用处是为了让随机值有正有负
In [13]:
params = create_params(hidden_size, input_size, num_labels)

测试一下.

In [14]:
forward_propagate(params, X[0:1], hidden_size, input_size, num_labels)
Out[14]:
(array([[0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.697,
         1.   ]]),
 array([[ 0.38519265, -0.15304749, -0.28037218, -0.0890345 , -0.21777067]]),
 array([[0.59512489, 0.46181264, 0.43036253, 0.47775607, 0.44577148,
         1.        ]]),
 array([[0.02009672, 0.0863241 ]]),
 array([[0.50502401, 0.52156763]]))
In [15]:
def cost(params, X, y, hidden_size, input_size, num_labels):
    a_0, z_1, a_1, z_2, y_pre = forward_propagate(params, X, hidden_size, input_size, num_labels)
    return np.sum(((y_pre - y) ** 2)) / 2
In [16]:
cost(params, X[0:1], y[0:1], hidden_size, input_size, num_labels)
Out[16]:
0.24197339111658744
In [17]:
def sigmoid_gradient(z):  # sigmoid导数
    return np.multiply(sigmoid(z), (1 - sigmoid(z)))

反向传播

标准 $\mathrm{BP}$ 算法

In [18]:
def backprop_one(params, X, y, hidden_size, input_size, num_labels, learning_rate):
    theta = separate_params(params, hidden_size, input_size, num_labels)
    a_0, z_1, a_1, z_2, y_pre = forward_propagate(params, X, hidden_size, input_size, num_labels)
    z_1 = np.c_[z_1, 1]  # 这里为什么要加一列呢, 因为delta_1应该只有5列, 但是theta[1]有6列, 多一列是增加的全1列导致的
    gradient = [None, None]
    delta_2 = (y_pre - y) * y_pre * (1 - y_pre)
    gradient[1] = delta_2.T @ a_1
    delta_1 = delta_2 @ theta[1] * sigmoid_gradient(z_1)
    gradient[0] = delta_1[:, :-1].T @ a_0  # 这里再去掉一列
    return np.concatenate((np.ravel(gradient[0]), np.ravel(gradient[1]))) * learning_rate
In [19]:
learning_rate = 0.1
backprop_one(params, X[0:1], y[0:1], hidden_size, input_size, num_labels, learning_rate)
Out[19]:
array([ 0.        ,  0.        , -0.00058511,  0.        ,  0.        ,
       -0.00058511,  0.        , -0.00058511,  0.        ,  0.        ,
       -0.00058511,  0.        , -0.00058511,  0.        ,  0.        ,
       -0.00058511,  0.        , -0.00040782, -0.00058511,  0.        ,
        0.        ,  0.00053986,  0.        ,  0.        ,  0.00053986,
        0.        ,  0.00053986,  0.        ,  0.        ,  0.00053986,
        0.        ,  0.00053986,  0.        ,  0.        ,  0.00053986,
        0.        ,  0.00037628,  0.00053986,  0.        ,  0.        ,
       -0.00046619,  0.        ,  0.        , -0.00046619,  0.        ,
       -0.00046619,  0.        ,  0.        , -0.00046619,  0.        ,
       -0.00046619,  0.        ,  0.        , -0.00046619,  0.        ,
       -0.00032493, -0.00046619,  0.        ,  0.        ,  0.00052684,
        0.        ,  0.        ,  0.00052684,  0.        ,  0.00052684,
        0.        ,  0.        ,  0.00052684,  0.        ,  0.00052684,
        0.        ,  0.        ,  0.00052684,  0.        ,  0.00036721,
        0.00052684,  0.        ,  0.        ,  0.00058905,  0.        ,
        0.        ,  0.00058905,  0.        ,  0.00058905,  0.        ,
        0.        ,  0.00058905,  0.        ,  0.00058905,  0.        ,
        0.        ,  0.00058905,  0.        ,  0.00041057,  0.00058905,
        0.00751305,  0.00583007,  0.00543304,  0.00603135,  0.00562756,
        0.01262433, -0.00710493, -0.00551338, -0.00513791, -0.00570372,
       -0.00532187, -0.01193855])

积累 $\mathrm{BP}$ 算法

In [20]:
def backprop_all(params, X, y, hidden_size, input_size, num_labels, learning_rate):
    n = X.shape[0]
    gradient = create_params(hidden_size, input_size, num_labels)
    gradient -= gradient
    for i in range(n):
        temp = backprop_one(params, X[i:i + 1], y[i:i + 1], hidden_size, input_size, num_labels, learning_rate)
        gradient += temp
    c = cost(params, X, y, hidden_size, input_size, num_labels)
    return c, gradient / n
In [21]:
from scipy.optimize import minimize  # 优化函数
fmin = minimize(fun=backprop_all, x0=params, args=(X, y, hidden_size, input_size, num_labels, learning_rate), 
                method='TNC', jac=True, options={'maxiter': 250}, )
fmin
/opt/conda/envs/python35-paddle120-env/lib/python3.7/site-packages/ipykernel_launcher.py:2: RuntimeWarning: overflow encountered in exp
  
Out[21]:
     fun: 0.8571714211551512
     jac: array([-0.        , -0.        ,  0.00000002,  0.00000001, -0.        ,
       -0.        , -0.        , -0.        ,  0.00000001,  0.        ,
        0.00000001, -0.        ,  0.        ,  0.00000001, -0.        ,
        0.        ,  0.00000001,  0.        ,  0.00000001,  0.        ,
       -0.        , -0.        , -0.        ,  0.        , -0.        ,
        0.        ,  0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        , -0.        , -0.00000011,  0.        ,
        0.00000009, -0.        , -0.00000003,  0.        ,  0.        ,
       -0.00000003, -0.        , -0.        ,  0.        , -0.00000003,
       -0.00000003, -0.        ,  0.00000001, -0.00000003,  0.00000001,
       -0.00000001, -0.00000003, -0.        ,  0.        ,  0.        ,
        0.        , -0.        ,  0.        ,  0.        , -0.        ,
        0.        ,  0.        ,  0.        , -0.        ,  0.        ,
        0.        , -0.        ,  0.        , -0.        , -0.        ,
       -0.        ,  0.        , -0.        , -0.        , -0.        ,
        0.        , -0.        , -0.        ,  0.        , -0.        ,
       -0.        , -0.        ,  0.        , -0.        , -0.        ,
        0.        , -0.        , -0.        , -0.        , -0.        ,
       -0.0000004 , -0.00000001, -0.00000044, -0.00000001, -0.00000001,
       -0.00000042, -0.00000016,  0.        , -0.00000016,  0.        ,
        0.        , -0.00000016])
 message: 'Local minimum reached (|pg| ~= 0)'
    nfev: 192
     nit: 23
  status: 0
 success: True
       x: array([  4.10994767,  -3.38262008,  -1.66868714,  -3.24411871,
        -2.46984165,   4.6815752 ,  -1.37843286,   3.78869796,
        -2.78044842,  -3.94449278,  10.80319956,  -6.67314333,
         3.74815023,  -4.94780848,   0.83831717,  -2.53073048,
         1.81723987,   2.07409822,  -0.23213484,  -6.91950787,
         5.73886718,   0.93084272,   5.51585314,   1.34983823,
        -5.59028422,   4.52771083, -10.60830637,   5.11812797,
         7.31446136, -13.47103628,   8.26838664, -10.55176694,
        10.45082016,   0.7237173 ,   0.43538802,   0.15472223,
        -4.55030913,   0.64703824,   7.20100671,  -3.28585527,
        -4.50225427,  -2.06638983,  -0.46444371,   2.3394346 ,
        -0.23825879,   1.82998162,  -2.30311466,  -2.94909435,
         8.44724355,  -5.47169461,   5.67214294,  -4.80113739,
        -1.85767564,   3.3058819 ,  -3.91790413,   1.36641511,
        -0.3051747 ,  -6.3947426 ,   5.94772173,  -0.57688648,
         4.1803336 ,   1.34869659,  -5.7644224 ,   1.92290715,
        -6.68352367,   5.17774734,   7.85376638, -12.06886523,
         6.58416861,  -7.58061773,   9.76257019,  -0.84016948,
        -1.53655026,   1.70572259,  -4.30749631,   0.12413366,
        -6.33839117,   6.1140821 ,  -0.42280632,   4.45988223,
         1.33514281,  -6.32830722,   3.25147091,  -8.00407706,
         4.99825238,   8.14201403, -14.17964567,   8.22992304,
        -8.8590785 ,   9.33319907,  -0.74043123,  -0.84782648,
         0.57699089,  -5.21293203,  -0.31722919, -10.09656673,
        11.60995381,   7.02699677,  -2.31627619,   2.11503571,
         1.27701308,  12.73974517, -10.25452238, -10.19452197,
        -1.36761941,  -4.22721727,  -0.75364835])
In [22]:
cost(fmin.x, X, y, hidden_size, input_size, num_labels)
Out[22]:
0.8571714211551512
In [23]:
forward_propagate(fmin.x, X, hidden_size, input_size, num_labels)
Out[23]:
(array([[0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.697,
         1.   ],
        [1.   , 0.   , 0.   , 0.   , 0.   , 1.   , 1.   , 0.   , 0.   ,
         0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.774,
         1.   ],
        [1.   , 0.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.634,
         1.   ],
        [0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 1.   , 0.   , 0.   ,
         0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.608,
         1.   ],
        [0.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.556,
         1.   ],
        [0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.403,
         1.   ],
        [1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   ,
         0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.481,
         1.   ],
        [1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 0.   , 0.   , 1.   , 1.   , 0.   , 0.437,
         1.   ],
        [1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   ,
         0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 1.   , 0.   , 0.666,
         1.   ],
        [0.   , 0.   , 1.   , 1.   , 0.   , 0.   , 0.   , 0.   , 1.   ,
         0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.243,
         1.   ],
        [0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 0.   , 0.   , 1.   ,
         1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.245,
         1.   ],
        [0.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,
         1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.343,
         1.   ],
        [0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   ,
         0.   , 0.   , 1.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.639,
         1.   ],
        [0.   , 1.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.   ,
         0.   , 0.   , 1.   , 1.   , 0.   , 0.   , 1.   , 0.   , 0.657,
         1.   ],
        [1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 0.   ,
         0.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.36 ,
         1.   ],
        [0.   , 1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   ,
         1.   , 0.   , 0.   , 0.   , 1.   , 0.   , 1.   , 0.   , 0.042,
         1.   ],
        [0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 1.   , 0.   , 0.   ,
         0.   , 0.   , 1.   , 0.   , 0.   , 1.   , 1.   , 0.   , 0.103,
         1.   ]]),
 array([[ 20.03571694, -41.3796903 ,  17.73964698, -37.08905704,
         -42.59238406],
        [ 20.80692651, -34.44439748,  27.47988151, -34.63215955,
         -37.6538167 ],
        [ 25.68368357, -48.94337141,  29.35682381, -42.63554089,
         -48.1795542 ],
        [ 14.68399139, -25.83869558,  15.54979562, -28.09925904,
         -30.87288514],
        [ 18.02933616, -35.93007225,  18.76338145, -29.95709186,
         -35.32047223],
        [ 13.71265251, -22.10695852,  -0.21956199, -18.72681303,
         -23.85286739],
        [  2.1767241 ,  -8.5728103 ,  -2.32865879,  -6.22762003,
          -7.76549222],
        [ 15.21383631, -29.83135383,  18.75394314, -27.93339687,
         -31.37050929],
        [ -6.95466889,   6.0020655 ,   3.07967363,  -1.66034886,
           1.10084597],
        [  0.25124771,   8.24064305,  -9.11669256,   7.25803407,
           3.18190089],
        [-20.55419971,  34.10573032, -12.0701126 ,  30.45438604,
          30.60520579],
        [ -1.50812746,   6.54656254, -10.62106923,  11.46849723,
           7.72863693],
        [ -4.71234048, -12.436227  ,   0.93757843, -11.07306942,
         -12.21701526],
        [-11.55607046,   7.4259091 ,   0.11033249,   3.98043467,
           5.48158836],
        [ 19.4021011 , -29.76164582,  11.42494314, -24.35944681,
         -29.54429616],
        [ -6.48040138,   8.19687138,  -3.80857415,   9.52278077,
           7.8729121 ],
        [ -6.74960416,   9.47411769,  -6.58900075,  -0.53049131,
           2.28786152]]),
 array([[1.        , 0.        , 0.99999998, 0.        , 0.        ,
         1.        ],
        [1.        , 0.        , 1.        , 0.        , 0.        ,
         1.        ],
        [1.        , 0.        , 1.        , 0.        , 0.        ,
         1.        ],
        [0.99999958, 0.        , 0.99999982, 0.        , 0.        ,
         1.        ],
        [0.99999999, 0.        , 0.99999999, 0.        , 0.        ,
         1.        ],
        [0.99999889, 0.        , 0.44532896, 0.00000001, 0.        ,
         1.        ],
        [0.89813977, 0.00018914, 0.0887771 , 0.00197026, 0.00042394,
         1.        ],
        [0.99999975, 0.        , 0.99999999, 0.        , 0.        ,
         1.        ],
        [0.00095326, 0.99753247, 0.95604647, 0.15971517, 0.75041858,
         1.        ],
        [0.56248358, 0.99973635, 0.00010981, 0.999296  , 0.96014747,
         1.        ],
        [0.        , 1.        , 0.00000573, 1.        , 1.        ,
         1.        ],
        [0.18121647, 0.99856702, 0.0000244 , 0.99998955, 0.99956015,
         1.        ],
        [0.00890374, 0.00000397, 0.71861025, 0.00001552, 0.00000495,
         1.        ],
        [0.00000958, 0.99940474, 0.52755517, 0.98166493, 0.99585454,
         1.        ],
        [1.        , 0.        , 0.99998908, 0.        , 0.        ,
         1.        ],
        [0.00153085, 0.99972456, 0.02169851, 0.99992684, 0.99961922,
         1.        ],
        [0.00116997, 0.99992319, 0.00137352, 0.3704023 , 0.90786673,
         1.        ]]),
 array([[ -1.792557  ,   1.79157503],
        [ -1.79255687,   1.79157484],
        [ -1.79255688,   1.79157485],
        [ -1.79255388,   1.79157131],
        [ -1.79255678,   1.79157474],
        [ -5.69021734,   7.44616687],
        [ -7.16874966,   9.77695707],
        [ -1.79255443,   1.79157178],
        [ 20.78404741, -24.10777177],
        [  6.92162523,  -9.26609767],
        [ 12.68576666, -16.60306579],
        [ 10.83869072, -14.27803628],
        [  6.23680844,  -7.96618787],
        [ 16.41954852, -21.93235492],
        [ -1.79263357,   1.79168612],
        [ 12.81891177, -16.80017659],
        [ 13.94613072, -15.35079968]]),
 array([[0.14275951, 0.85712027],
        [0.14275953, 0.85712025],
        [0.14275953, 0.85712025],
        [0.1427599 , 0.85711982],
        [0.14275954, 0.85712024],
        [0.00336748, 0.99941667],
        [0.00076969, 0.99994326],
        [0.14275983, 0.85711987],
        [1.        , 0.        ],
        [0.99901475, 0.00009457],
        [0.99999691, 0.00000006],
        [0.99998038, 0.00000063],
        [0.99804773, 0.00034688],
        [0.99999993, 0.        ],
        [0.14275014, 0.85713388],
        [0.99999729, 0.00000005],
        [0.99999912, 0.00000022]]))

效果不错

手写数字识别

In [24]:
from scipy.io import loadmat
data = loadmat('/home/aistudio/data/data20138/ex4data1.mat')
data
Out[24]:
{'__header__': b'MATLAB 5.0 MAT-file, Platform: GLNXA64, Created on: Sun Oct 16 13:09:09 2011',
 '__version__': '1.0',
 '__globals__': [],
 'X': array([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]]),
 'y': array([[10],
        [10],
        [10],
        ...,
        [ 9],
        [ 9],
        [ 9]], dtype=uint8)}
In [25]:
X = np.array(data['X'])
y_prev = data['y']
y = enc.fit_transform(data['y']).toarray()
In [26]:
def backprop_one(params, X, y, hidden_size, input_size, num_labels, learning_rate):
    theta = separate_params(params, hidden_size, input_size, num_labels)
    a_0, z_1, a_1, z_2, y_pre = forward_propagate(params, X, hidden_size, input_size, num_labels)
    z_1 = np.c_[z_1, 1]  # 这里为什么要加一列呢, 因为delta_1应该只有5列, 但是theta[1]有6列, 多一列是增加的全1列导致的
    gradient = [None, None]
    delta_2 = (y_pre - y) * y_pre * (1 - y_pre)
    gradient[1] = delta_2.T @ a_1
    delta_1 = delta_2 @ theta[1] * sigmoid_gradient(z_1)
    gradient[0] = delta_1[:, :-1].T @ a_0
    return np.concatenate((np.ravel(gradient[0]), np.ravel(gradient[1]))) * learning_rate
In [27]:
hidden_size = 25
num_labels = 10
input_size = X.shape[1]
params = create_params(hidden_size, input_size, num_labels)
learning_rate = 0.1
In [28]:
fmin = minimize(fun=backprop_all, x0=params, args=(X, y, hidden_size, input_size, num_labels, learning_rate), 
                method='TNC', jac=True, options={'maxiter': 250}, )
fmin
Out[28]:
     fun: 58.71480112541169
     jac: array([ 0.        ,  0.        ,  0.        , ..., -0.00000151,
       -0.00000187, -0.00000151])
 message: 'Max. number of function evaluations reached'
    nfev: 250
     nit: 23
  status: 3
 success: False
       x: array([ 0.02183185,  0.0706242 , -0.0751135 , ..., -0.52321701,
       -0.46704909,  1.23747671])
In [29]:
cost(fmin.x, X, y, hidden_size, input_size, num_labels)
Out[29]:
58.71480112541169
In [30]:
a_0, z_1, a_1, z_2, y_pre = forward_propagate(fmin.x, X, hidden_size, input_size, num_labels)
y_pred = np.array(np.argmax(y_pre, axis=1) + 1)
In [31]:
correct = [1 if a == b else 0 for (a, b) in zip(y_pred, y_prev)]
accuracy = (sum(map(int, correct)) / float(len(correct)))
print ('accuracy = {0}%'.format(accuracy * 100))
accuracy = 98.26%

$\mathrm{RBF}$ 网络

我们要解决的是异或问题, 所以我们先人工制造数据.

In [37]:
X = np.array([[1, 0], [1, 1], [0, 0], [0, 1]])
y = np.array([[1], [0], [0], [1]])
In [102]:
def gaussian_radial_base(X, c):  # 高斯径向基函数
    res = np.ones([X.shape[0], c.shape[0]])
    for i in range(c.shape[0]):
        res[:, i] = np.exp(-np.sum((X - c[i]) ** 2, axis=1))
    return res
In [108]:
gaussian_radial_base(np.array([[1, 1], [0, 0]]), c)
Out[108]:
array([[1.        , 0.13533528],
       [0.13533528, 1.        ]])

中心就随便选了...

In [111]:
c = np.array([[1, 1], [0, 0]])
c
Out[111]:
array([[1, 1],
       [0, 0]])

重写反向传播

In [290]:
hidden_size = 2
num_labels = 1
input_size = X.shape[1]
params = (np.random.random(hidden_size + 1) - 0.5) * 0.25
learning_rate = 0.1
In [291]:
def forward_propagate(params, X, hidden_size, input_size, num_labels, c):
    theta = params.reshape(1, hidden_size + 1)
    n = X.shape[0]
    a_0 = X
    z_1 = gaussian_radial_base(a_0, c)
    a_1 = np.c_[z_1, np.ones([X.shape[0], 1])]
    z_2 = a_1 @ theta.T
    res = sigmoid(z_2)
    return a_0, z_1, a_1, z_2, res
In [292]:
def backprop_one(params, X, y, hidden_size, input_size, num_labels, learning_rate, c):
    theta = params.reshape(1, hidden_size + 1)
    a_0, z_1, a_1, z_2, y_pre = forward_propagate(params, X, hidden_size, input_size, num_labels, c)
    delta_2 = (y_pre - y) * y_pre * (1 - y_pre)
    gradient = delta_2.T @ a_1  # 前面一层没有参数可以计算
    return gradient * learning_rate
In [293]:
backprop_one(params, X[0:1], y[0:1], hidden_size, input_size, num_labels, learning_rate, c)
Out[293]:
array([[-0.00454241, -0.00454241, -0.01234755]])
In [294]:
def backprop_all(params, X, y, hidden_size, input_size, num_labels, learning_rate, c):
    a_0, z_1, a_1, z_2, y_pre = forward_propagate(params, X, hidden_size, input_size, num_labels, c)
    cost = np.sum((y - y_pre) ** 2)
    gradient = 0
    for i in range(X.shape[0]):
        gradient += backprop_one(params, X[i:i + 1], y[i:i + 1], hidden_size, input_size, num_labels, learning_rate, c)
    return cost, gradient.ravel()
In [295]:
backprop_all(params, X, y, hidden_size, input_size, num_labels, learning_rate, c)
Out[295]:
(1.0045513176884362, array([0.00562419, 0.00500978, 0.00067503]))
In [298]:
gmin = minimize(fun=backprop_all, x0=params, args=(X, y, hidden_size, input_size, num_labels, learning_rate, c), 
                method='TNC', jac=True, options={'maxiter': 250}, )
gmin
Out[298]:
     fun: 8.118343737216682e-06
     jac: array([0.00000011, 0.00000075, 0.00000075])
 message: 'Local minimum reached (|pg| ~= 0)'
    nfev: 17
     nit: 5
  status: 0
 success: True
       x: array([-37.89014423, -35.62718282,  34.86700206])
In [303]:
a_0, z_1, a_1, z_2, y_pre = forward_propagate(gmin.x, X, hidden_size, input_size, num_labels, c)
y_pre
Out[303]:
array([[0.99959914],
       [0.00039165],
       [0.0027647 ],
       [0.99959914]])

基本上是正确的, 解决了异或问题.