In [1]:
import numpy as np

ndarray

In [2]:
np.ones([5,5])
Out[2]:
array([[ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.],
       [ 1.,  1.,  1.,  1.,  1.]])
In [3]:
np.zeros((2,2,2))
Out[3]:
array([[[ 0.,  0.],
        [ 0.,  0.]],

       [[ 0.,  0.],
        [ 0.,  0.]]])
In [4]:
arr1 = np.ones([5,5])
arr1.shape
Out[4]:
(5L, 5L)

Dtype

In [5]:
arr1.dtype
Out[5]:
dtype('float64')
In [6]:
np.empty((3,2),dtype=np.float64)
Out[6]:
array([[  2.33591471e-316,   3.53227214e-316],
       [  5.29729280e-316,   5.17169855e-316],
       [  5.17171001e-316,   3.64236538e-316]])
In [7]:
np.arange(15) #type of result is ndarray
Out[7]:
array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])
In [8]:
a=[1.3,2.5,3.7,8.4,5.8,6.9]
a=np.array(a)
#or a=np.asarray()
In [9]:
np.eye(3)
Out[9]:
array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])
In [10]:
a.astype(np.int8)
Out[10]:
array([1, 2, 3, 8, 5, 6], dtype=int8)

ndarray的运算

In [11]:
a*a
Out[11]:
array([  1.69,   6.25,  13.69,  70.56,  33.64,  47.61])
In [12]:
1/a
Out[12]:
array([ 0.76923077,  0.4       ,  0.27027027,  0.11904762,  0.17241379,
        0.14492754])

索引与切片

In [13]:
a=[1.3,2.5,3.7,8.4,5.8,6.9]
print a 
a=np.array(a)
a_slice = a[2:5]
a_slice[1] = 15
print a
[1.3, 2.5, 3.7, 8.4, 5.8, 6.9]
[  1.3   2.5   3.7  15.    5.8   6.9]
In [14]:
a=[1.3,2.5,3.7,8.4,5.8,6.9]
print a 
a=np.array(a) #!!! or it won't work
a_slice = a[2:5].copy()
a_slice[1] = 15
print a
[1.3, 2.5, 3.7, 8.4, 5.8, 6.9]
[ 1.3  2.5  3.7  8.4  5.8  6.9]
In [15]:
a=[1.3,2.5,3.7,8.4,5.8,6.9]
print a 
#a=np.array(a)
a_slice = a[2:5]
a_slice[1] = 15
print a
[1.3, 2.5, 3.7, 8.4, 5.8, 6.9]
[1.3, 2.5, 3.7, 8.4, 5.8, 6.9]
In [16]:
print arr1[3][4]
print arr1[3,4]
1.0
1.0

布尔型索引

In [17]:
np.random.randn(4,4)
Out[17]:
array([[-0.53789766,  0.1786269 ,  0.18696044, -0.47223838],
       [-0.15807101,  0.58647865,  0.37101347,  0.01853015],
       [-1.08283603,  0.09306486, -0.33948372, -0.21062796],
       [ 0.36399429, -0.84222661, -2.53475439,  0.46601569]])
In [18]:
from numpy.random import *
d=randn(6,3)
print d
[[-1.588765    0.7900994  -1.90495055]
 [ 0.98120577 -1.64354516 -0.61113784]
 [ 0.44174428  0.25765596  0.43273007]
 [ 0.62410893  0.2818092   0.26686234]
 [ 0.03870905  0.02795682 -0.26961556]
 [-1.40377426  1.03351871  0.0959984 ]]
In [19]:
names = np.array(['A','B','c','A','a','D'])
names == 'A'
Out[19]:
array([ True, False, False,  True, False, False], dtype=bool)
In [20]:
d[names == 'A'] #output row 1 and 4 only
Out[20]:
array([[-1.588765  ,  0.7900994 , -1.90495055],
       [ 0.62410893,  0.2818092 ,  0.26686234]])
In [21]:
d[names == 'A',2]
Out[21]:
array([-1.90495055,  0.26686234])
In [22]:
d[names == 'A', 1:2]
Out[22]:
array([[ 0.7900994],
       [ 0.2818092]])
In [23]:
d[names != 'A']
Out[23]:
array([[ 0.98120577, -1.64354516, -0.61113784],
       [ 0.44174428,  0.25765596,  0.43273007],
       [ 0.03870905,  0.02795682, -0.26961556],
       [-1.40377426,  1.03351871,  0.0959984 ]])
In [24]:
d[~(names == 'A')]
Out[24]:
array([[ 0.98120577, -1.64354516, -0.61113784],
       [ 0.44174428,  0.25765596,  0.43273007],
       [ 0.03870905,  0.02795682, -0.26961556],
       [-1.40377426,  1.03351871,  0.0959984 ]])

花式索引(Fancy indexing)

In [25]:
arr = np.empty((8,4))
for i in range(8):
    arr[i]=i
arr
Out[25]:
array([[ 0.,  0.,  0.,  0.],
       [ 1.,  1.,  1.,  1.],
       [ 2.,  2.,  2.,  2.],
       [ 3.,  3.,  3.,  3.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.],
       [ 6.,  6.,  6.,  6.],
       [ 7.,  7.,  7.,  7.]])
In [26]:
arr[[1, 7, 2, 4, 5]]
Out[26]:
array([[ 1.,  1.,  1.,  1.],
       [ 7.,  7.,  7.,  7.],
       [ 2.,  2.,  2.,  2.],
       [ 4.,  4.,  4.,  4.],
       [ 5.,  5.,  5.,  5.]])
In [27]:
arr[[-1, -2, -3]]
Out[27]:
array([[ 7.,  7.,  7.,  7.],
       [ 6.,  6.,  6.,  6.],
       [ 5.,  5.,  5.,  5.]])
In [28]:
arr = np.arange(32).reshape((8,4))
arr
Out[28]:
array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])
In [29]:
arr[[1,5,7,2],[0,3,1,2]]
Out[29]:
array([ 4, 23, 29, 10])
In [30]:
arr[[1,5,7,2]][:,[0,3,1,2]]
Out[30]:
array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])
In [31]:
arr[np.ix_([1,5,7,2],[0,3,1,2])]
Out[31]:
array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])
  • 花式索引与切片不同,它总是将数据复制到新数组中。

数组转置和轴对换

In [32]:
print arr
print '-'*20
print arr.T
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]
 [16 17 18 19]
 [20 21 22 23]
 [24 25 26 27]
 [28 29 30 31]]
--------------------
[[ 0  4  8 12 16 20 24 28]
 [ 1  5  9 13 17 21 25 29]
 [ 2  6 10 14 18 22 26 30]
 [ 3  7 11 15 19 23 27 31]]
In [33]:
np.dot(arr.T,arr)
Out[33]:
array([[2240, 2352, 2464, 2576],
       [2352, 2472, 2592, 2712],
       [2464, 2592, 2720, 2848],
       [2576, 2712, 2848, 2984]])

对于高维数组,transpose需要得到一个由轴编号组成的元组才能对这些轴进行转置

In [34]:
arr = np.arange(16).reshape((2,2,4))
arr
Out[34]:
array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])
In [35]:
arr.transpose((1,0,2))
Out[35]:
array([[[ 0,  1,  2,  3],
        [ 8,  9, 10, 11]],

       [[ 4,  5,  6,  7],
        [12, 13, 14, 15]]])
In [36]:
arr.swapaxes(1,2)
Out[36]:
array([[[ 0,  4],
        [ 1,  5],
        [ 2,  6],
        [ 3,  7]],

       [[ 8, 12],
        [ 9, 13],
        [10, 14],
        [11, 15]]])

通用函数:快速的元素级数组函数

部分一元ufunc函数:

一元ufunc函数 说明
abs、fabs 计算绝对值,后者只适用于非复数值但更快
log、log10、log2、log1p 前三个分别为底数为e,10,2的log,第四个为底数为2的log(x+1)
sign 计算正负号
ceil 计算ceiling值,即大于该值的最小整数
floor 计算floor值,即小于该值的最大整数
rint 四舍五入到整数,保留dtype
modf 将数组的小数和整数部分以两个独立数组的形式返回
isnan 返回一个表示“哪些是NaN”的布尔型数组
isfinite、isinf 返回一个表示“哪些是有穷(无穷)”的布尔型数组
二元ufunc函数 说明
add、substract、multiply、devide 将数组中的元素相加/相减/相乘/相除
floor_devide 向下圆整除法(丢弃余数)
power 计算A^B
maximum/minimum/fmax/fmin 最大/最小值,后两个会忽略NaN
mod 取模
copysign 将第二个数组中的值复制给第一个数组中的值
greater/greater_equal/less/less_equal/equal/not_equal >、>=、<、<=、=、!=,返回布尔型数组

利用数组进行数据处理

In [37]:
points = np.arange(0,10,1)
points
Out[37]:
array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
In [38]:
x, y = np.meshgrid(points,points)
In [39]:
x
Out[39]:
array([[0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
       [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]])
In [40]:
y
Out[40]:
array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
       [1, 1, 1, 1, 1, 1, 1, 1, 1, 1],
       [2, 2, 2, 2, 2, 2, 2, 2, 2, 2],
       [3, 3, 3, 3, 3, 3, 3, 3, 3, 3],
       [4, 4, 4, 4, 4, 4, 4, 4, 4, 4],
       [5, 5, 5, 5, 5, 5, 5, 5, 5, 5],
       [6, 6, 6, 6, 6, 6, 6, 6, 6, 6],
       [7, 7, 7, 7, 7, 7, 7, 7, 7, 7],
       [8, 8, 8, 8, 8, 8, 8, 8, 8, 8],
       [9, 9, 9, 9, 9, 9, 9, 9, 9, 9]])
In [41]:
import matplotlib.pyplot as plt
z = np.sqrt(x**2 + y **2)
z
Out[41]:
array([[  0.        ,   1.        ,   2.        ,   3.        ,
          4.        ,   5.        ,   6.        ,   7.        ,
          8.        ,   9.        ],
       [  1.        ,   1.41421356,   2.23606798,   3.16227766,
          4.12310563,   5.09901951,   6.08276253,   7.07106781,
          8.06225775,   9.05538514],
       [  2.        ,   2.23606798,   2.82842712,   3.60555128,
          4.47213595,   5.38516481,   6.32455532,   7.28010989,
          8.24621125,   9.21954446],
       [  3.        ,   3.16227766,   3.60555128,   4.24264069,
          5.        ,   5.83095189,   6.70820393,   7.61577311,
          8.54400375,   9.48683298],
       [  4.        ,   4.12310563,   4.47213595,   5.        ,
          5.65685425,   6.40312424,   7.21110255,   8.06225775,
          8.94427191,   9.8488578 ],
       [  5.        ,   5.09901951,   5.38516481,   5.83095189,
          6.40312424,   7.07106781,   7.81024968,   8.60232527,
          9.43398113,  10.29563014],
       [  6.        ,   6.08276253,   6.32455532,   6.70820393,
          7.21110255,   7.81024968,   8.48528137,   9.21954446,
         10.        ,  10.81665383],
       [  7.        ,   7.07106781,   7.28010989,   7.61577311,
          8.06225775,   8.60232527,   9.21954446,   9.89949494,
         10.63014581,  11.40175425],
       [  8.        ,   8.06225775,   8.24621125,   8.54400375,
          8.94427191,   9.43398113,  10.        ,  10.63014581,
         11.3137085 ,  12.04159458],
       [  9.        ,   9.05538514,   9.21954446,   9.48683298,
          9.8488578 ,  10.29563014,  10.81665383,  11.40175425,
         12.04159458,  12.72792206]])
In [42]:
plt.imshow(z, cmap=plt.cm.gray);plt.colorbar(); 
plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")
plt.show()

将条件逻辑表述为数组运算

In [43]:
x = np.arange(1.1, 1.6, 0.1)
y = np.arange(2.1, 2.6, 0.1)
cond = np.array([True, False, True, True, False])
x,y
Out[43]:
(array([ 1.1,  1.2,  1.3,  1.4,  1.5]), array([ 2.1,  2.2,  2.3,  2.4,  2.5]))

numpy.where是一个三元函数:

In [44]:
np.where(cond,x,y) #if True, output the value in x, or output the value in y
Out[44]:
array([ 1.1,  2.2,  1.3,  1.4,  2.5])

数学和统计方法

In [45]:
arr = np.random.rand(5,4)
arr
Out[45]:
array([[ 0.29181426,  0.62212448,  0.07760222,  0.66713924],
       [ 0.24527186,  0.69692749,  0.42675127,  0.73714726],
       [ 0.36955407,  0.08503785,  0.58238958,  0.78733278],
       [ 0.74452781,  0.41448592,  0.90111088,  0.13783555],
       [ 0.23119486,  0.6491596 ,  0.38671888,  0.49402327]])
In [46]:
arr.mean()
Out[46]:
0.47740745546985747
In [47]:
np.mean(arr)
Out[47]:
0.47740745546985747
In [48]:
arr.sum()
Out[48]:
9.5481491093971496
In [49]:
arr.mean(axis=1)
Out[49]:
array([ 0.41467005,  0.52652447,  0.45607857,  0.54949004,  0.44027415])
In [50]:
arr = np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]])
In [51]:
arr
Out[51]:
array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])
In [52]:
arr.cumsum(0)
Out[52]:
array([[ 1,  2,  3],
       [ 5,  7,  9],
       [12, 15, 18]])
In [53]:
arr.cumsum(1)
Out[53]:
array([[ 1,  3,  6],
       [ 4,  9, 15],
       [ 7, 15, 24]])
In [54]:
arr.cumprod(0)
Out[54]:
array([[  1,   2,   3],
       [  4,  10,  18],
       [ 28,  80, 162]])
In [55]:
arr.argmax()
Out[55]:
8
In [56]:
arr.argmin()
Out[56]:
0

排序

In [57]:
arr = randn(8)
arr
Out[57]:
array([ 0.85140871, -0.52717307, -1.07794534,  0.05712297, -0.87596978,
       -0.98002288, -0.98001361, -0.89881679])
In [58]:
arr.sort()
arr
Out[58]:
array([-1.07794534, -0.98002288, -0.98001361, -0.89881679, -0.87596978,
       -0.52717307,  0.05712297,  0.85140871])
In [59]:
arr = randn(5,4)
arr
Out[59]:
array([[-1.42937646, -0.29155616, -0.17042937, -1.83786746],
       [ 1.1543149 ,  1.36851682,  0.07119453, -2.14286932],
       [-0.39755705, -0.31821724,  0.85320558, -1.26520982],
       [-0.6558445 , -1.58571312,  0.35574366,  0.53134701],
       [-0.46564628,  1.25648144,  0.59939394, -1.4013401 ]])
In [60]:
arr.sort(1)
arr
Out[60]:
array([[-1.83786746, -1.42937646, -0.29155616, -0.17042937],
       [-2.14286932,  0.07119453,  1.1543149 ,  1.36851682],
       [-1.26520982, -0.39755705, -0.31821724,  0.85320558],
       [-1.58571312, -0.6558445 ,  0.35574366,  0.53134701],
       [-1.4013401 , -0.46564628,  0.59939394,  1.25648144]])

唯一化以及其他的集合逻辑

In [61]:
ints = np.array([1,1,1,1,2,2,2,3,4,4,4,5,5])
np.unique(ints)
Out[61]:
array([1, 2, 3, 4, 5])
In [62]:
a= np.array([1,2,3,4,5])
b= np.array([2,3,4,5,6])
np.intersect1d(a,b)
Out[62]:
array([2, 3, 4, 5])
In [63]:
np.union1d(a,b)
Out[63]:
array([1, 2, 3, 4, 5, 6])
In [64]:
np.in1d(a,b)
Out[64]:
array([False,  True,  True,  True,  True], dtype=bool)
In [65]:
np.setdiff1d(a,b)
Out[65]:
array([1])
In [66]:
np.setxor1d(a,b)
Out[66]:
array([1, 6])

存取文本文件

  • np.loadtxt()
  • np.savetxt()

线性代数

In [67]:
np.dot(x,y)
Out[67]:
15.050000000000004
In [68]:
x.dot(y)
Out[68]:
15.050000000000004
In [69]:
from numpy.linalg import inv, qr

numpu.linalg中常用函数:

函数 说明
diag 返回对角线上的元素(一维数组)
dot 矩阵乘法
trace 计算矩阵的迹
det 计算矩阵的行列式
eig 计算矩阵的特征值,特征向量
inv 计算矩阵的逆
pinv 计算矩阵的Moore-Penrose伪逆
qr 计算QR分解
svd 计算奇异值分解
solve 解线性方程Ax=b,其中A为一个方阵
lstsq 计算Ax=b的最小二乘解