可以将文章内容翻译成中文,广告屏蔽插件可能会导致该功能失效(如失效，请关闭广告屏蔽插件后再试):

问题:

I am trying to perform a groupby filter that is very similar to the example in this documentation: pandas groupby filter

>>> df = pd.DataFrame({'A' : ['foo', 'bar', 'foo', 'bar',
...                           'foo', 'bar'],
...                    'B' : [1, 2, 3, 4, 5, 6],
...                    'C' : [2.0, 5., 8., 1., 2., 9.]})
>>> grouped = df.groupby('A')
>>> grouped.filter(lambda x: x['B'].mean() > 3.)
     A  B    C
1  bar  2  5.0
3  bar  4  1.0
5  bar  6  9.0

I am trying to return a DataFrame that has all 3 columns, but only 2 rows. Those 2 rows contain the minimum values of column B, after grouping by column A. I tried the following line of code:

grouped.filter(lambda x: x['B'] == x['B'].min())

But this doesn't work, and I get this error: TypeError: filter function returned a Series, but expected a scalar bool

The DataFrame I am trying to return should look like this:

    A   B   C
0  foo  1  2.0
1  bar  2  5.0

I would appreciate any help you can provide. Thank you, in advance, for your help.

回答1:

>>> # sort=False to return the rows in the order they originally occurred
>>> df.loc[df.groupby("A", sort=False)["B"].idxmin()]

     A  B    C
0  foo  1  2.0
1  bar  2  5.0

回答2:

No need groupby :-)

df.sort_values('B').drop_duplicates('A')
Out[288]: 
     A  B    C
0  foo  1  2.0
1  bar  2  5.0

回答3:

There's a fundamental difference: In the documentation example, there is a single Boolean value per group. That is, you return the entire group if the mean is greater than 3. In your example, you want to filter specific rows within a group.

For your task the usual trick is to sort values and use .head or .tail to filter to the row with the smallest or largest value respectively:

df.sort_values('B').groupby('A').head(1)

#     A  B    C
#0  foo  1  2.0
#1  bar  2  5.0

For more complicated queries you can use .transform or .apply to create a Boolean Series to slice. Also in this case safer if multiple rows share the minimum and you need all of them:

df[df.groupby('A').B.transform(lambda x: x == x.min())]

#     A  B    C
#0  foo  1  2.0
#1  bar  2  5.0

回答4:

The short answer:

grouped.apply(lambda x: x[x['B'] == x['B']].min())

... and the longer one:

Your grouped object has 2 groups:

In[25]: for df in grouped:
   ...:     print(df)
   ...:     
('bar',      
     A  B    C
1  bar  2  5.0
3  bar  4  1.0
5  bar  6  9.0)

('foo',      
     A  B    C
0  foo  1  2.0
2  foo  3  8.0
4  foo  5  2.0)

filter() method for GroupBy object is for filtering groups as entities, NOT for filtering their individual rows. So using the filter() method, you may obtain only 4 results:

an empty DataFrame (0 rows),
rows of the group 'bar' (3 rows),
rows of the group 'foo' (3 rows),
rows of both groups (6 rows)

Nothing else, regardless of the used parameter (boolean function) in the filter() method.

So you have to use some other method. An appropriate one is the very flexible apply() method, which lets you apply an arbitrary function which

takes a DataFrame (a group of GroupBy object) as its only parameter,
returns either a Pandas object or a scalar.

In your case that function should return (for every of your 2 groups) the 1-row DataFrame having the minimal value in the column 'B', so we will use the Boolean mask

group['B'] == group['B'].min()

for selecting such a row (or - maybe - more rows):

In[26]: def select_min_b(group):
   ...:     return group[group['B'] == group['B'].min()]

Now using this function as a parameter of the apply() method of GroupBy object grouped we will obtain

In[27]: grouped.apply(select_min_b)
Out[27]: 
         A  B    C
A                 
bar 1  bar  2  5.0
foo 0  foo  1  2.0

Note:

The same, but as only one command (using the lambda function):

grouped.apply(lambda group: group[group['B'] == group['B']].min())

回答5:

df.groupby('A').apply(lambda x: x.loc[x['B'].idxmin(), ['B','C']]).reset_index()

Python 3 pandas.groupby.filter

问题:

回答1:

回答2:

回答3:

回答4:

回答5:

收藏的人(0)

Python 3 pandas.groupby.filter

问题:

回答1:

回答2:

回答3:

回答4:

回答5:

收藏的人(0)

举报内容

检举类型

检举原因

检举说明(必填)

打开微信“扫一扫”，打开网页后点击屏幕右上角分享按钮