In [1]:
import pandas as pd
import numpy as np
In [2]:
df = pd.DataFrame([('bird', 389.0),
                   ('bird', 24.0),
                   ('mammal', 80.5),
                   ('mammal', np.nan)],
                  index=['falcon', 'parrot', 'lion', 'monkey'],
                  columns=('class', 'max_speed'))
In [3]:
df
Out[3]:
class max_speed
falcon bird 389.0
parrot bird 24.0
lion mammal 80.5
monkey mammal NaN
In [4]:
(df['class'] == 'bird') & (df['max_speed'] > 100)  # 括号是必须要的
Out[4]:
falcon     True
parrot    False
lion      False
monkey    False
dtype: bool
In [5]:
mask = (df['class'] == 'bird') & (df['max_speed'] > 100)
type(mask)
Out[5]:
pandas.core.series.Series
In [6]:
mask2 = np.array([False, False, True, False])
df[mask2]
Out[6]:
class max_speed
lion mammal 80.5
In [7]:
mask3 = pd.Series([True, False, False, False], index=['monkey', 'parrot', 'falcon', 'lion'])  # 注意这里的index顺序有所调整
df[mask3]
/var/folders/x5/2xw2cfv976d81pb7x0fqttxh0000gn/T/ipykernel_52400/954345329.py:2: UserWarning: Boolean Series key will be reindexed to match DataFrame index.
  df[mask3]
Out[7]:
class max_speed
monkey mammal NaN
In [8]:
mask4 = pd.Series([True, False, False, False], index=['falcon', 'parrot', 'lion', 'monkey'])  # index和df.index顺序一致
df[mask4]
Out[8]:
class max_speed
falcon bird 389.0
In [9]:
mask5 = [True, False, False, False]
df[mask5]
Out[9]:
class max_speed
falcon bird 389.0
In [10]:
mask6 = pd.Series([True, False, False, False], index=df.index)
df[mask6]
Out[10]:
class max_speed
falcon bird 389.0
In [11]:
mask7 = pd.Series([True, False, False, False], index=['falcon', 'parrot', 'lion', 'lion'])  # 错误的index(多了个lion)
# df[mask7]


# IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
In [12]:
mask8 = pd.Series([True, False, False], index=['falcon', 'parrot', 'lion'])  # 错误的index(少了个monkey)
# df[mask8]


# IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
In [13]:
mask9 = pd.Series([True, False, False, False])  # 不指定index
# mask9本身可以创建
mask9
Out[13]:
0     True
1    False
2    False
3    False
dtype: bool
In [14]:
# 但无法正常使用,因为使用的是0/1/2/3自然数索引
# df[mask9]

# IndexingError: Unalignable boolean Series provided as indexer (index of the boolean Series and of the indexed object do not match).
In [ ]: