In [1]:
import pandas as pd
import numpy as np
In [2]:
df = pd.DataFrame([('bird', 389.0),
('bird', 24.0),
('mammal', 80.5),
('mammal', np.nan)],
index=['falcon', 'parrot', 'lion', 'monkey'],
columns=('class', 'max_speed'))
In [3]:
df
Out[3]:
class | max_speed | |
---|---|---|
falcon | bird | 389.0 |
parrot | bird | 24.0 |
lion | mammal | 80.5 |
monkey | mammal | NaN |
In [4]:
(df['class'] == 'bird') & (df['max_speed'] > 100) # 括号是必须要的
Out[4]:
falcon True parrot False lion False monkey False dtype: bool
In [5]:
mask = (df['class'] == 'bird') & (df['max_speed'] > 100)
type(mask)
Out[5]:
pandas.core.series.Series
In [6]:
mask2 = np.array([False, False, True, False])
df[mask2]
Out[6]:
class | max_speed | |
---|---|---|
lion | mammal | 80.5 |
In [7]:
mask3 = pd.Series([True, False, False, False], index=['monkey', 'parrot', 'falcon', 'lion']) # 注意这里的index顺序有所调整
df[mask3]
/var/folders/x5/2xw2cfv976d81pb7x0fqttxh0000gn/T/ipykernel_52400/954345329.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index. df[mask3]
Out[7]:
class | max_speed | |
---|---|---|
monkey | mammal | NaN |
In [8]:
mask4 = pd.Series([True, False, False, False], index=['falcon', 'parrot', 'lion', 'monkey']) # index和df.index顺序一致
df[mask4]
Out[8]:
class | max_speed | |
---|---|---|
falcon | bird | 389.0 |
In [9]:
mask5 = [True, False, False, False]
df[mask5]
Out[9]:
class | max_speed | |
---|---|---|
falcon | bird | 389.0 |
In [10]:
mask6 = pd.Series([True, False, False, False], index=df.index)
df[mask6]
Out[10]:
class | max_speed | |
---|---|---|
falcon | bird | 389.0 |
In [11]:
mask7 = pd.Series([True, False, False, False], index=['falcon', 'parrot', 'lion', 'lion']) # 错误的index(多了个lion)
# df[mask7]
# IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
In [12]:
mask8 = pd.Series([True, False, False], index=['falcon', 'parrot', 'lion']) # 错误的index(少了个monkey)
# df[mask8]
# IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
In [13]:
mask9 = pd.Series([True, False, False, False]) # 不指定index
# mask9本身可以创建
mask9
Out[13]:
0 True 1 False 2 False 3 False dtype: bool
In [14]:
# 但无法正常使用,因为使用的是0/1/2/3自然数索引
# df[mask9]
# IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
In [ ]: