NumPy Ndarray对象的创建与使用

NumPy 的 ndarray 是科学计算的核心数据结构。以下是创建和使用 ndarray 的详细指南：

一、创建数组的基本方法

1. 从 Python 列表/元组创建

import numpy as np

# 从列表创建一维数组
arr1 = np.array([1, 2, 3, 4, 5])

# 从嵌套列表创建二维数组
arr2 = np.array([[1, 2, 3], [4, 5, 6]])

# 从元组创建
arr3 = np.array((1, 2, 3))

2. 使用内置函数创建特殊数组

# 创建全零数组
zeros = np.zeros((3, 4))          # 3行4列的0数组
zeros_like = np.zeros_like(arr2)  # 形状与arr2相同的0数组

# 创建全一数组
ones = np.ones((2, 3))            # 2行3列的1数组

# 创建单位矩阵
eye = np.eye(3)                   # 3x3单位矩阵
identity = np.identity(4)         # 4x4单位矩阵

# 创建空数组（值未初始化，可能包含任意值）
empty = np.empty((2, 2))

# 创建对角矩阵
diag = np.diag([1, 2, 3, 4])

3. 创建数值序列

# 等差数列
arange = np.arange(0, 10, 2)      # [0, 2, 4, 6, 8]

# 等间隔数列
linspace = np.linspace(0, 1, 5)  # [0.  , 0.25, 0.5 , 0.75, 1.  ]

# 等比数列
logspace = np.logspace(0, 2, 5)   # [1., 3.16, 10., 31.62, 100.]

4. 随机数组

# 均匀分布 [0, 1)
rand_uniform = np.random.rand(3, 3)

# 标准正态分布
rand_normal = np.random.randn(2, 4)

# 指定范围的随机整数
rand_int = np.random.randint(0, 10, size=(3, 3))

# 随机采样
random_sample = np.random.random_sample((2, 2))

二、数组属性

arr = np.array([[1, 2, 3], [4, 5, 6]])

print("形状:", arr.shape)        # (2, 3)
print("维度:", arr.ndim)         # 2
print("元素总数:", arr.size)     # 6
print("数据类型:", arr.dtype)    # int64
print("元素字节大小:", arr.itemsize)  # 8
print("总字节大小:", arr.nbytes) # 48

三、数据类型控制

# 创建时指定数据类型
arr_int32 = np.array([1, 2, 3], dtype=np.int32)
arr_float64 = np.array([1, 2, 3], dtype=np.float64)
arr_complex = np.array([1, 2, 3], dtype=np.complex128)

# 转换数据类型
arr_float = arr_int32.astype(np.float32)

四、数组操作

1. 重塑形状

arr = np.arange(12)

# reshape - 不改变原数组
reshaped = arr.reshape(3, 4)      # 3行4列
reshaped = arr.reshape(2, 2, 3)   # 三维数组

# resize - 改变原数组
arr.resize(3, 4)

# flatten 和 ravel - 展平为一维
flattened = arr.flatten()         # 返回拷贝
raveled = arr.ravel()             # 返回视图

2. 索引和切片

arr = np.array([[1, 2, 3, 4],
                [5, 6, 7, 8],
                [9, 10, 11, 12]])

# 基础索引
print(arr[0])          # 第一行: [1 2 3 4]
print(arr[1, 2])       # 第二行第三列: 7
print(arr[:, 1])       # 第二列: [2 6 10]

# 切片
print(arr[0:2, 1:3])   # 第0-1行, 第1-2列
print(arr[::2, ::2])    # 隔行隔列选取

# 布尔索引
mask = arr > 5
print(arr[mask])        # 所有大于5的元素

# 花式索引
print(arr[[0, 2]])      # 第0行和第2行
print(arr[:, [0, 3]])   # 第0列和第3列

3. 数组拼接

a = np.array([[1, 2], [3, 4]])
b = np.array([[5, 6]])

# 垂直拼接
vstack = np.vstack((a, b))      # [[1 2], [3 4], [5 6]]

# 水平拼接
hstack = np.hstack((a, a.T))    # [[1 2 1 3], [3 4 2 4]]

# concatenate
concat = np.concatenate((a, b), axis=0)  # 沿轴0拼接

4. 数组分割

arr = np.arange(12).reshape(3, 4)

# 水平分割
hsplits = np.hsplit(arr, 2)      # 分成2个子数组

# 垂直分割
vsplits = np.vsplit(arr, 3)      # 分成3个子数组

# split
splits = np.split(arr, 2, axis=1)  # 沿轴1分割

五、实用技巧

1. 视图 vs 副本

arr = np.array([1, 2, 3, 4, 5])

# 视图（共享数据）
view = arr[1:4]          # 视图，修改会影响原数组
view[0] = 99             # arr也会改变

# 副本（独立数据）
copy = arr[1:4].copy()   # 副本，独立于原数组
copy[0] = 88             # arr不会改变

2. 广播机制

# 不同形状数组间的运算
a = np.array([[1, 2, 3], [4, 5, 6]])
b = np.array([10, 20, 30])

result = a + b  # b被广播到a的形状
# [[11 22 33]
#  [14 25 36]]

3. 向量化运算

# 避免Python循环，使用向量化操作
arr = np.arange(1000000)

# 慢 - Python循环
# result = [x**2 for x in arr]

# 快 - NumPy向量化
result = arr ** 2

六、高级创建方法

# 从函数创建
def func(i, j):
    return i + j

arr_func = np.fromfunction(func, (3, 3))
# [[0. 1. 2.]
#  [1. 2. 3.]
#  [2. 3. 4.]]

# 网格坐标
x, y = np.meshgrid(np.arange(3), np.arange(3))

# 重复数组
repeated = np.repeat([1, 2, 3], 3)  # [1 1 1 2 2 2 3 3 3]
tiled = np.tile([1, 2, 3], 3)       # [1 2 3 1 2 3 1 2 3]