语言基础
Python
Pandas

Series 和 DataFrame 增删改查

import numpy as np
import pandas as pd
from pandas import Series
from pandas import DataFrame

1
2
3
4

# Series

# 查

data = [1,2,3]
index = ['a','b','c']
s = Series(data=data, index=index)
s

1
2
3
4

a    1
b    2
c    3
dtype: int64

# 使用 [ ] 快捷查看

s['b'] # scalar, 返回一个值

s[0:2] # 范围，左闭右开，返回Series切片

a    1
b    2
dtype: int64

s['a':'c'] # 注意，利用标签切片的时候左右都是闭区间

a    1
b    2
c    3
dtype: int64

s[[0,2]] #列表，返回Series切片

a    1
c    3
dtype: int64

s[['a','c']]

a    1
c    3
dtype: int64

mask = [False, True, False]  #mask，类似于列表，只是长度必须和Series相同，返回Series切片
s[mask]

1
2

b    2
dtype: int64

# `.loc[]`基于索引查看

.loc[]查询方式和[]完全一致。

s.loc['b'] # 单索引，返回一个值
s['b']

1
2

2

2

s.loc['a':'c'] # 范围，注意：左闭右闭，返回Series切片
s[0:3]

1
2

a    1
b    2
c    3
dtype: int64

a    1
b    2
c    3
dtype: int64

s.loc[['a','c']] # 列表，返回Series切片

a    1
c    3
dtype: int64

mask = [True, True, False] # mask，和iloc[]效果等同，返回Series切片
s.loc[mask]

1
2

a    1
b    2
dtype: int64

# `.iloc[]`基于位置查看

无视索引，只安装位置定位。

s.iloc[1] # scalar, 返回一个值
s[1]

1
2

2

2

s.iloc[0:2] # 范围，左闭右开，返回Series切片

a    1
b    2
dtype: int64

s.iloc[[0, 2]] #列表，返回Series切片

a    1
c    3
dtype: int64

mask = [False, True, False]  #mask，类似于列表，只是长度必须和Series相同，返回Series切片
s.iloc[mask]

1
2

b    2
dtype: int64

# 改

# 改值

s1 = s.copy()  # 深copy，拷贝数据结构包含的所有信息

s1['a'] = 10
s1['b'] = 10
s1

1
2
3

a    10
b    10
c     3
dtype: int64

s1[0:2] = 10
s1

1
2

a    10
b    10
c     3
dtype: int64

函数修改：Series.replace(to_replace=None, value=None, inplace=False)

to_replace：要修改的值，可以为列表
value：改为的值，可以为列表，与 to_repalce 要匹配；
inplace：是否在原地修改；

s1.replace(to_replace = 10, value = 100, inplace=False)

a    100
b    100
c      3
dtype: int64

# 改索引

直接在 index 上改，index 类似于 tuple，只能引用到别处，不能切片修改

s1 = s.copy()
s1.index = ['a','e','f']
s1

1
2
3

a    1
e    2
f    3
dtype: int64

函数修改：Series.rename(index=None, level = None, inplace = False)

index：list or dict，list 时必须和已有索引长度相同，dict 可以部分修改；
level：多重索引时，可以指定修改哪一重，从 0 开始递增；
inplace：是否原地修改。

s1.rename(index={'e':'b'}, inplace=False)

a    1
b    2
f    3
dtype: int64

# 增

# 直接增一行

s1 = s.copy()
s1['d'] = 4
s1

1
2
3

a    1
b    2
c    3
d    4
dtype: int64

# 函数增多行

Series.append(to_append, ignore_index=False, verify_integrity=False)

to_append: 另一个 series 或多个 Series 构成的列表；
ignore_index：False-保留原有索引，True-清除所有索引，生成默认数值索引；
verify_integrity：True 的情况下，如果 to_append 索引与当前索引有重复，则报错。

s1 = pd.Series([22,33], index = ['a', 'g'])
s.append(s1, ignore_index=False)

1
2

a     1
b     2
c     3
a    22
g    33
dtype: int64

# 删

# 直接删一行

s = Series(np.arange(4), index=['a', 'b', 'c', 'd'])
s2 = s.drop('c')
s2

1
2
3

a    0
b    1
d    3
dtype: int64

# 函数删多行

Series.drop(labels, level=None, inplace=False)

labels：索引，单索引或索引的列表；
level：多重索引需要设置；
inplace：是否本地修改。

s1 = s.copy()
s1.drop(['a','c'])

1
2

b    1
d    3
dtype: int64

# DataFrame

data = [[1,2,3],
        [4,5,6]]
index = ['a','b']
columns = ['A','B','C']
df = pd.DataFrame(data=data, index=index, columns=columns)
df

1
2
3
4
5
6

	A	B	C
a	1	2	3
b	4	5	6

# 查

# `[]`快捷查看

[] 属于快捷查看方式，只包含下面四种，两种列操作、两种行操作。

# 索引是列操作，切片是行操作，一维布尔索引是行操作

df['A'] # 列操作，单列索引，返回Series。相当于 df.A。

1
2
3

a    1
b    4
Name: A, dtype: int64

df[['A','C']] # 列操作，列索引列表，返回DataFrame

	A	C
a	1	3
b	4	6

# df[0]  # 报错

df[:1]  # 行操作，位置范围，返回DataFrame

	A	B	C
a	1	2	3

# df[[0,1]] #报错

mask = [False, True]
df[mask] # 行操作，mask，必须和行长度一致，返回DataFrame

1
2

	A	B	C
b	4	5	6

df['a':'b']

	A	B	C
a	1	2	3
b	4	5	6

df[df>5]

	A	B	C
a	NaN	NaN	NaN
b	NaN	NaN	6.0

# `.loc[]`基于索引

.loc[]在 DataFrame 中与[]不一致。

DataFrame 有两维，每一维都和 Series 的 .loc[] 用法相同；
Series 有四种方式，所以 DataFrame 有16 种方式;
可以缺省后面维度，默认补全为 ':' 。

下面都以第一维度为例，第二维可以类比。

df.loc['b','B'] # 返回单一值，因为两维都是单索引

df.loc['a':'b', 'A'] #返回Series，如果只有一维是单索引

a    1
b    4
Name: A, dtype: int64

df.loc[['a','b'], 'B'] #返回Series，如果只有一维是单索引

a    2
b    5
Name: B, dtype: int64

mask1 = [True, True, False]
df.loc[mask1, 'B']

1
2

a    2
b    5
Name: B, dtype: int64

# `.iloc[]`基于位置

无视索引，只按照位置定位。

DataFrame 有两维，每一维都和 Series 的 .iloc[] 用法相同；
Series 有四种方式，所以 DataFrame 有16种方式；
可以缺省后面维度，默认补全为 ':' 。

下面都以第一维度为例，第二维可以类比。

df.iloc[1, 1] # 返回单一值，因为两维都是scalar

df.iloc[0:2, 0]  # 返回Series，如果只有一维是scalar

a    1
b    4
Name: A, dtype: int64

df.iloc[[0,1], [0,2]] # 返回DataFrame

	A	C
a	1	3
b	4	6

mask1 = [False, True, False]  # 返回DataFrame
mask2 = [True, False]
df.iloc[mask2, mask1]

1
2
3

	B
a	2

# 改

# 改值

直接在查的基础上赋值进行修改，.loc[]方法确保在原地修改，否则会报 warning。

df1 = df.copy()
df1.loc['a', 'A'] = 10
df1

1
2
3

	A	B	C
a	10	2	3
b	4	5	6

函数批量任意修改：DataFrame.replace(to_replace=None, value=None, inplace=False)

to_replace：要修改的值，可以为列表
value：改为的值，可以为列表，与 to_repalce 要匹配；
inplace：是否在原地修改；

df1.replace(to_replace=10, value=100, inplace=False)

	A	B	C
a	100	2	3
b	4	5	6

df1[['A','B']] = df1[['B','A']]  #交换两列
df1

1
2

	A	B	C
a	2	10	3
b	5	4	6

# 改索引

直接在索引上改，索引类似于 tuple，必须全改，不能切片修改

df1 = df.copy()
df1.index = ['e', 'f']
df1.columns = ['E', 'F', 'G']
df1

1
2
3
4

	E	F	G
e	1	2	3
f	4	5	6

函数修改：DataFrame.rename(index=None, columns = None, level = None, inplace = False)

index：list or dict，list 时必须长度相同，dict 时可以部分修改；
columns：list or dict，list 时必须长度相同，dict 时可以部分修改；
level：多重索引时，可以指定修改哪一重，目前还用不着；
inplace：是否原地修改。

df1.rename(index = {'e':'b'}, columns = {'E':'A'}, inplace = False)

	A	F	G
b	1	2	3
f	4	5	6

# 增

# 直接增一行

df1 = df.copy()
df1.loc['c'] = [7,8,9]
df1

1
2
3

	A	B	C
a	1	2	3
b	4	5	6
c	7	8	9

# 函数增多行

使用pd.concat(objs, axis=0)函数。

确保 列索引 相同，行增加。（其实这个函数并不要求列索引相同，它可以选择出相同的列。而我写这个教程遵循了 python 的宣言—明确：做好一件事有一种最好的方法，精确控制每一步，可以少犯错。）

objs: list of DataFrame；
axis: 取 0，进行行增加操作。

df1 = DataFrame([[22,33,44],[55,66,77]], index = ['c','d'],columns = ['A','B','C'])
pd.concat([df, df1], axis=0 )

1
2

	A	B	C
a	1	2	3
b	4	5	6
c	22	33	44
d	55	66	77

# 直接增一列

df1 = df.copy()
df1['H'] = [7,8]
df1

1
2
3

	A	B	C	H
a	1	2	3	7
b	4	5	6	8

# 函数增多列

pd.concat(objs, axis=1)，确保行索引相同，列增加。

objs: list of DataFrame；
axis: 取 1，进行列增加操作。

df1 = pd.DataFrame([[22,33],[44,55]], index = ['a','b'],columns = ['D','E'])
pd.concat([df,df1], axis=1)

1
2

	A	B	C	D	E
a	1	2	3	22	33
b	4	5	6	44	55

# 删

# 函数删多行

DataFrame.drop(labels, axis = 0, level=None, inplace=False)：

labels：索引，单索引或索引的列表；
axis：0-删行；
level：多重索引需要指定；
inplace：是否本地修改。

df1 = df.copy()
df1.drop(['a'], axis=0)

1
2

	A	B	C
b	4	5	6

# 直接删一列

df1 = df.copy()
del df1['A']
df1

1
2
3

	B	C
a	2	3
b	5	6

# 函数删多列

DataFrame.drop(labels, axis = 1, level=None, inplace=False)：

labels：索引，单索引或索引的列表；
axis：1-删列；
level：多重索引需要指定；
inplace：是否本地修改。

df1 = df.copy()
df1.drop(['A','C'], axis=1) # axis=1 或 ‘columns’

1
2

	B
a	2
b	5

上次更新: 2023/11/01, 03:11:44

← 认识 Series 和 DataFrame Index对象增删改查→

Series 和 DataFrame 增删改查

# Series

# 查

# 使用 [ ] 快捷查看

# .loc[]基于索引查看

# .iloc[]基于位置查看

# 改

# 改值

# 改索引

# 增

# 直接增一行

# 函数增多行

# 删

# 直接删一行

# 函数删多行

# DataFrame

# 查

# []快捷查看

# .loc[]基于索引

# .iloc[]基于位置

# 改

# 改值

# 改索引

# 增

# 直接增一行

# 函数增多行

# 直接增一列

# 函数增多列

# 删

# 函数删多行

# 直接删一列

# 函数删多列

# `.loc[]`基于索引查看

# `.iloc[]`基于位置查看

# `[]`快捷查看

# `.loc[]`基于索引

# `.iloc[]`基于位置