numpy func
softmax
Now, we have vector , which is the output of a classification model; however values are unnormalized, i.e. .
We will use softmax function to normalize , and the formular is
where
In case, X is a 2D tensor, i.e
after normalization, we have
where
import numpy as np
def softmax(x):
assert len(x.shape) == 2
x = np.asarray(x, dtype='float32')
dim = x.shape[1]
x_tile = x[:, :, np.newaxis] # expand the last dim
x_tile = np.tile(x_tile, (1, 1, dim)) # repeat the last dim
x_sub = x[:, np.newaxis, :] # expand the second dim
x_out = x_tile - x_sub
g = 1. / np.sum(np.exp(-x_out), axis=1)
return g
a = softmax(np.random.randn(3, 4))
print(a)
print(np.sum(a, axis=1))
[[ 0.13674939 0.06958251 0.45072809 0.34294 ]
[ 0.30497229 0.25565502 0.31585526 0.12351744]
[ 0.39067909 0.2473492 0.07670641 0.28526533]]
[ 1. 1. 1.]
compute mean and std
- 计算序列均值/方差公式为:
当我们不确定序列长度 n 的值时,对上面公式进行变换有:
即通过迭代的方式有: x_mean += dx / n
n = 0
x_mean = 0
x_var = 0
for x in x_iter:
n += 1
dx = x - x_mean
x_mean += dx / n
x_var += dx * (x - x_mean)
x_std = sqrt(x_var / n)
import numpy as np
def mean_std(x_list):
x_mean = 0.
x_square = 0.
n = 0
for x in x_list:
n += 1
dx = x - x_mean
x_mean += dx / n
x_square += dx * (x - x_mean)
return x_mean, np.sqrt(x_square / n)
x = np.random.randn(1000)
x_mean_1 = np.mean(x)
x_std_1 = np.std(x)
x_mean_2, x_std_2 = mean_std(x)
print(x_mean_1, x_std_1)
print(x_mean_2, x_std_2)
(0.042060158790048097, 0.95157457005493618)
(0.042060158790048138, 0.9515745700549364)
l1 vs l2 distance
def circle_points(theta):
"""
inputs
------
theta: 1D array, angle
return
------
coord: 2D array,
"""
x = np.cos(theta)
y = np.sin(theta)
coord = np.asarray([x, y]).T
return coord
def l1_distance(x, y):
"""
inputs
------
x: 2D np.ndarray with shape=[n1, dim]
y: 2D np.ndarray with shape=[n2, dim]
return
------
dist: 2D np.ndarray with shape=[n1, n2],
dist[i, j] = sum_{k}(abs(x[i, k] - y[j, k]))
"""
assert len(x.shape) == len(y.shape) == 2
assert x.shape[1] == y.shape[1]
x = x[:, np.newaxis, :]
y = y[np.newaxis, :, :]
z = np.abs(x - y)
dist = np.sum(z, axis=-1)
return dist
n_samples = 100
theta = np.linspace(0.0, 2 * np.pi, n_samples)
l2_coord = circle_points(theta) # shape=(n_samples, 2)
# we get 2-D coords according to l1_dist
l1_dist = l1_distance(l2_coord, np.array([[0, 0.]]))
x = l1_dist.ravel() * np.cos(theta)
y = l1_dist.ravel() * np.sin(theta)
l1_coord = np.asarray([x, y]).T # shape=(n_samples, 2)
plt.scatter(l2_coord[:, 0], l2_coord[:, 1], label='l2', c='b')
plt.scatter(l1_coord[:, 0], l1_coord[:, 1], label='l1', c='r')
plt.legend()
plt.show()