Add new column to data.frame based on rows grouped

2019-06-14 18:46发布

df = read.table(text = 'ID  Day Count   Count_sum
33021   9535    3   29
33029   9535    3   29
34001   9535    3   29
32010   9534    2   29
33023   9534    2   29
45012   9533    4   29
47001   9533    4   29
48010   9533    4   29
50001   9533    4   29
49004   9532    1   29
9002    9531    2   29
67008   9531    2   29
40011   9530    1   29
42003   9529    2   29
42011   9529    2   29
55023   9528    1   29
40012   9527    3   29
43007   9527    3   29
47011   9527    3   29
52004   9526    4   29
52005   9526    4   29
52006   9526    4   29
52007   9526    4   29
19001   9525    1   29
57008   9524    5   29
57010   9524    5   29
58006   9524    5   29
58008   9524    5   29
59001   9524    5   29
58008   9537    3   27
66001   9537    3   27
68001   9537    3   27
54057   9536    1   27
33021   9535    3   27
33029   9535    3   27
34001   9535    3   27
32010   9534    2   27
33023   9534    2   27
32010   9534    2   27
33023   9534    2   27
45012   9533    4   27
47001   9533    4   27
48010   9533    4   27
50001   9533    4   27
45012   9533    4   27
47001   9533    4   27
48010   9533    4   27
50001   9533    4   27
49004   9532    1   27
49004   9532    1   27
9002    9531    2   27
67008   9531    2   27
9002    9531    2   27
67008   9531    2   27
40011   9530    1   27
40011   9530    1   27
42003   9529    2   27
42011   9529    2   27
42003   9529    2   27
42011   9529    2   27
55023   9528    1   27
55023   9528    1   27
40012   9527    3   27
43007   9527    3   27
47011   9527    3   27
40012   9527    3   27
43007   9527    3   27
47011   9527    3   27
52004   9526    4   27
52005   9526    4   27
52006   9526    4   27
52007   9526    4   27
52004   9526    4   27
52005   9526    4   27
52006   9526    4   27
52007   9526    4   27
19001   9525    1   27
57008   9524    5   27
57010   9524    5   27
58006   9524    5   27
58008   9524    5   27
59001   9524    5   27
65004   9523    1   27
49004   9532    1   26
9002    9531    2   26
67008   9531    2   26
40011   9530    1   26
42003   9529    2   26
42011   9529    2   26
55023   9528    1   26
40012   9527    3   26
43007   9527    3   26
47011   9527    3   26
52004   9526    4   26
52005   9526    4   26
52006   9526    4   26
52007   9526    4   26
19001   9525    1   26
57008   9524    5   26
57010   9524    5   26
58006   9524    5   26
58008   9524    5   26
59001   9524    5   26
65004   9523    1   26
75003   9522    1   26
76007   9521    4   26
77002   9521    4   26
77003   9521    4   26
78003   9521    4   26
48007   9538    2   25
48011   9538    2   25
58008   9537    3   25
66001   9537    3   25
68001   9537    3   25
54057   9536    1   25
54057   9536    1   25
33021   9535    3   25
33029   9535    3   25
34001   9535    3   25
33021   9535    3   25
33029   9535    3   25
34001   9535    3   25
32010   9534    2   25
33023   9534    2   25
32010   9534    2   25
33023   9534    2   25
45012   9533    4   25
47001   9533    4   25
48010   9533    4   25
50001   9533    4   25
45012   9533    4   25
47001   9533    4   25
48010   9533    4   25
50001   9533    4   25
45012   9533    4   25
47001   9533    4   25
48010   9533    4   25
50001   9533    4   25
49004   9532    1   25
49004   9532    1   25
49004   9532    1   25
9002    9531    2   25
67008   9531    2   25
9002    9531    2   25
67008   9531    2   25
9002    9531    2   25
67008   9531    2   25
9002    9531    2   25
67008   9531    2   25
40011   9530    1   25
40011   9530    1   25
40011   9530    1   25
40011   9530    1   25
42003   9529    2   25
42011   9529    2   25
42003   9529    2   25
42011   9529    2   25
42003   9529    2   25
42011   9529    2   25
42003   9529    2   25
42011   9529    2   25
55023   9528    1   25
55023   9528    1   25
55023   9528    1   25
55023   9528    1   25
40012   9527    3   25
43007   9527    3   25
47011   9527    3   25
40012   9527    3   25
43007   9527    3   25
47011   9527    3   25
40012   9527    3   25
43007   9527    3   25
47011   9527    3   25
40012   9527    3   25
43007   9527    3   25
47011   9527    3   25
52004   9526    4   25
52005   9526    4   25
52006   9526    4   25
52007   9526    4   25
52004   9526    4   25
52005   9526    4   25
52006   9526    4   25
52007   9526    4   25
52004   9526    4   25
52005   9526    4   25
52006   9526    4   25
52007   9526    4   25
19001   9525    1   25
19001   9525    1   25
19001   9525    1   25
57008   9524    5   25
57010   9524    5   25
58006   9524    5   25
58008   9524    5   25
59001   9524    5   25
57008   9524    5   25
57010   9524    5   25
58006   9524    5   25
58008   9524    5   25
59001   9524    5   25
65004   9523    1   25
65004   9523    1   25
75003   9522    1   25
75003   9522    1   25
76007   9521    4   25
77002   9521    4   25
77003   9521    4   25
78003   9521    4   25
74001   9520    1   25
39093   9539    2   24
41006   9539    2   24
48007   9538    2   24
48011   9538    2   24
58008   9537    3   24
66001   9537    3   24
68001   9537    3   24
54057   9536    1   24
33021   9535    3   24
33029   9535    3   24
34001   9535    3   24
32010   9534    2   24
33023   9534    2   24
45012   9533    4   24
47001   9533    4   24
48010   9533    4   24
50001   9533    4   24
49004   9532    1   24
9002    9531    2   24
67008   9531    2   24
40011   9530    1   24
40011   9530    1   24
42003   9529    2   24
42011   9529    2   24
42003   9529    2   24
42011   9529    2   24
42003   9529    2   24
42011   9529    2   24
55023   9528    1   24
55023   9528    1   24
55023   9528    1   24
40012   9527    3   24
43007   9527    3   24
47011   9527    3   24
40012   9527    3   24
43007   9527    3   24
47011   9527    3   24
52004   9526    4   24
52005   9526    4   24
52006   9526    4   24
52007   9526    4   24
52004   9526    4   24
52005   9526    4   24
52006   9526    4   24
52007   9526    4   24
19001   9525    1   24
19001   9525    1   24
57008   9524    5   24
57010   9524    5   24
58006   9524    5   24
58008   9524    5   24
59001   9524    5   24
57008   9524    5   24
57010   9524    5   24
58006   9524    5   24
58008   9524    5   24
59001   9524    5   24
65004   9523    1   24
65004   9523    1   24
75003   9522    1   24
75003   9522    1   24
76007   9521    4   24
77002   9521    4   24
77003   9521    4   24
78003   9521    4   24
76007   9521    4   24
77002   9521    4   24
77003   9521    4   24
78003   9521    4   24
74001   9520    1   24
74001   9520    1   24
33021   9518    1   24
55023   9528    1   22
40012   9527    3   22
43007   9527    3   22
47011   9527    3   22
52004   9526    4   22
52005   9526    4   22
52006   9526    4   22
52007   9526    4   22
19001   9525    1   22
57008   9524    5   22
57010   9524    5   22
58006   9524    5   22
58008   9524    5   22
59001   9524    5   22
65004   9523    1   22
75003   9522    1   22
76007   9521    4   22
77002   9521    4   22
77003   9521    4   22
78003   9521    4   22
74001   9520    1   22
33021   9518    1   22
40012   9527    3   21
43007   9527    3   21
47011   9527    3   21
52004   9526    4   21
52005   9526    4   21
52006   9526    4   21
52007   9526    4   21
19001   9525    1   21
57008   9524    5   21
57010   9524    5   21
58006   9524    5   21
58008   9524    5   21
59001   9524    5   21
65004   9523    1   21
75003   9522    1   21
76007   9521    4   21
77002   9521    4   21
77003   9521    4   21
78003   9521    4   21
74001   9520    1   21
33021   9518    1   21
52004   9526    4   18
52005   9526    4   18
52006   9526    4   18
52007   9526    4   18
19001   9525    1   18
57008   9524    5   18
57010   9524    5   18
58006   9524    5   18
58008   9524    5   18
59001   9524    5   18
65004   9523    1   18
75003   9522    1   18
76007   9521    4   18
77002   9521    4   18
77003   9521    4   18
78003   9521    4   18
74001   9520    1   18
33021   9518    1   18
19001   9525    1   14
57008   9524    5   14
57010   9524    5   14
58006   9524    5   14
58008   9524    5   14
59001   9524    5   14
65004   9523    1   14
75003   9522    1   14
76007   9521    4   14
77002   9521    4   14
77003   9521    4   14
78003   9521    4   14
74001   9520    1   14
33021   9518    1   14
57008   9524    5   13
57010   9524    5   13
58006   9524    5   13
58008   9524    5   13
59001   9524    5   13
65004   9523    1   13
75003   9522    1   13
76007   9521    4   13
77002   9521    4   13
77003   9521    4   13
78003   9521    4   13
74001   9520    1   13
33021   9518    1   13
65004   9523    1   8
75003   9522    1   8
76007   9521    4   8
77002   9521    4   8
77003   9521    4   8
78003   9521    4   8
74001   9520    1   8
33021   9518    1   8
75003   9522    1   7
76007   9521    4   7
77002   9521    4   7
77003   9521    4   7
78003   9521    4   7
74001   9520    1   7
33021   9518    1   7
76007   9521    4   6
77002   9521    4   6
77003   9521    4   6
78003   9521    4   6
74001   9520    1   6
33021   9518    1   6
74001   9520    1   2
33021   9518    1   2
33021   9518    1   1', header = TRUE)

Day column shows days;
Count column shows the sum of ID in that particular Day;
Count_sum column shows the sum of ID by blocks of 12 days, i.e. Day + Day -1 + Day -2 + Day -3 + Day -4 + Day -5 + Day -6 + Day -7 + Day -8 + Day -9 + Day -10 + Day -11.

e.g. 1) Count_sum = 29 because it represents the sum of 3 (Day 9535) + 2 (Day 9534) + 4 (Day 9533) + 1 (Day 9532) + 2 (Day 9531) + 1 (Day 9530) + 2 (Day 9529) + 1 (Day 9528) + 3 (Day 9527) + 4 (Day 9526) + 1 (Day 9525) + 5 (Day 9524);

2) Count_sum = 27 because of 3 (Day 9537) + 1 (Day 9536) + 3 (Day 9535) + 2 (Day 9534) + 4 (Day 9533) + 1 (Day 9532) + 2 (Day 9531) + 1 (Day 9530) + 2 Day 9529) + 1 (Day 9528) + 3 (Day 9527) + 4 (Day 9526);

etc, etc..

What I need to do is to add a 5th column (Episode_ID) to df which groups each 12-day episodes with a unique value from 1 to 21 (because in df there are 21 unique days).

Count_sum nearly group them correctly but there can be 2 or more 12-day episodes with same Count_sum values and that can also overlap within days.

My real data.frame contains >300,000 rows and I would like also to obtain a code that works for 12-day episodes (as df) but also for other data.frames grouped by 2,3,4,5,6,7,8,n days.

Here my expected output for df (12-day episodes):

ID     Day  Count Count_sum Episode_ID
33021   9535    3   29  1
33029   9535    3   29  1
34001   9535    3   29  1
32010   9534    2   29  1
33023   9534    2   29  1
45012   9533    4   29  1
47001   9533    4   29  1
48010   9533    4   29  1
50001   9533    4   29  1
49004   9532    1   29  1
9002    9531    2   29  1
67008   9531    2   29  1
40011   9530    1   29  1
42003   9529    2   29  1
42011   9529    2   29  1
55023   9528    1   29  1
40012   9527    3   29  1
43007   9527    3   29  1
47011   9527    3   29  1
52004   9526    4   29  1
52005   9526    4   29  1
52006   9526    4   29  1
52007   9526    4   29  1
19001   9525    1   29  1
57008   9524    5   29  1
57010   9524    5   29  1
58006   9524    5   29  1
58008   9524    5   29  1
59001   9524    5   29  1
58008   9537    3   27  2
66001   9537    3   27  2
68001   9537    3   27  2
54057   9536    1   27  2
33021   9535    3   27  2
33029   9535    3   27  2
34001   9535    3   27  2
32010   9534    2   27  2
33023   9534    2   27  2
45012   9533    4   27  2
47001   9533    4   27  2
48010   9533    4   27  2
50001   9533    4   27  2
49004   9532    1   27  2
9002    9531    2   27  2
67008   9531    2   27  2
40011   9530    1   27  2
42003   9529    2   27  2
42011   9529    2   27  2
55023   9528    1   27  2
40012   9527    3   27  2
43007   9527    3   27  2
47011   9527    3   27  2
52004   9526    4   27  2
52005   9526    4   27  2
52006   9526    4   27  2
52007   9526    4   27  2
32010   9534    2   27  3
33023   9534    2   27  3
45012   9533    4   27  3
47001   9533    4   27  3
48010   9533    4   27  3
50001   9533    4   27  3
49004   9532    1   27  3
9002    9531    2   27  3
67008   9531    2   27  3
40011   9530    1   27  3
42003   9529    2   27  3
42011   9529    2   27  3
55023   9528    1   27  3
40012   9527    3   27  3
43007   9527    3   27  3
47011   9527    3   27  3
52004   9526    4   27  3
52005   9526    4   27  3
52006   9526    4   27  3
52007   9526    4   27  3
19001   9525    1   27  3
57008   9524    5   27  3
57010   9524    5   27  3
58006   9524    5   27  3
58008   9524    5   27  3
59001   9524    5   27  3
65004   9523    1   27  3
49004   9532    1   26  4
9002    9531    2   26  4
67008   9531    2   26  4
40011   9530    1   26  4
42003   9529    2   26  4
42011   9529    2   26  4
55023   9528    1   26  4
40012   9527    3   26  4
43007   9527    3   26  4
47011   9527    3   26  4
52004   9526    4   26  4
52005   9526    4   26  4
52006   9526    4   26  4
52007   9526    4   26  4
19001   9525    1   26  4
57008   9524    5   26  4
57010   9524    5   26  4
58006   9524    5   26  4
58008   9524    5   26  4
59001   9524    5   26  4
65004   9523    1   26  4
75003   9522    1   26  4
76007   9521    4   26  4
77002   9521    4   26  4
77003   9521    4   26  4
78003   9521    4   26  4
48007   9538    2   25  5
48011   9538    2   25  5
58008   9537    3   25  5
66001   9537    3   25  5
68001   9537    3   25  5
54057   9536    1   25  5
33021   9535    3   25  5
33029   9535    3   25  5
34001   9535    3   25  5
32010   9534    2   25  5
33023   9534    2   25  5
45012   9533    4   25  5
47001   9533    4   25  5
48010   9533    4   25  5
50001   9533    4   25  5
49004   9532    1   25  5
9002    9531    2   25  5
67008   9531    2   25  5
40011   9530    1   25  5
42003   9529    2   25  5
42011   9529    2   25  5
55023   9528    1   25  5
40012   9527    3   25  5
43007   9527    3   25  5
47011   9527    3   25  5
54057   9536    1   25  6
33021   9535    3   25  6
33029   9535    3   25  6
34001   9535    3   25  6
32010   9534    2   25  6
33023   9534    2   25  6
45012   9533    4   25  6
47001   9533    4   25  6
48010   9533    4   25  6
50001   9533    4   25  6
49004   9532    1   25  6
9002    9531    2   25  6
67008   9531    2   25  6
40011   9530    1   25  6
42003   9529    2   25  6
42011   9529    2   25  6
55023   9528    1   25  6
40012   9527    3   25  6
43007   9527    3   25  6
47011   9527    3   25  6
52004   9526    4   25  6
52005   9526    4   25  6
52006   9526    4   25  6
52007   9526    4   25  6
19001   9525    1   25  6
45012   9533    4   25  7
47001   9533    4   25  7
48010   9533    4   25  7
50001   9533    4   25  7
49004   9532    1   25  7
9002    9531    2   25  7
67008   9531    2   25  7
40011   9530    1   25  7
42003   9529    2   25  7
42011   9529    2   25  7
55023   9528    1   25  7
40012   9527    3   25  7
43007   9527    3   25  7
47011   9527    3   25  7
52004   9526    4   25  7
52005   9526    4   25  7
52006   9526    4   25  7
52007   9526    4   25  7
19001   9525    1   25  7
57008   9524    5   25  7
57010   9524    5   25  7
58006   9524    5   25  7
58008   9524    5   25  7
59001   9524    5   25  7
65004   9523    1   25  7
75003   9522    1   25  7
9002    9531    2   25  8
67008   9531    2   25  8
40011   9530    1   25  8
42003   9529    2   25  8
42011   9529    2   25  8
55023   9528    1   25  8
40012   9527    3   25  8
43007   9527    3   25  8
47011   9527    3   25  8
52004   9526    4   25  8
52005   9526    4   25  8
52006   9526    4   25  8
52007   9526    4   25  8
19001   9525    1   25  8
57008   9524    5   25  8
57010   9524    5   25  8
58006   9524    5   25  8
58008   9524    5   25  8
59001   9524    5   25  8
65004   9523    1   25  8
75003   9522    1   25  8
76007   9521    4   25  8
77002   9521    4   25  8
77003   9521    4   25  8
78003   9521    4   25  8
74001   9520    1   25  8
39093   9539    2   24  9
41006   9539    2   24  9
48007   9538    2   24  9
48011   9538    2   24  9
58008   9537    3   24  9
66001   9537    3   24  9
68001   9537    3   24  9
54057   9536    1   24  9
33021   9535    3   24  9
33029   9535    3   24  9
34001   9535    3   24  9
32010   9534    2   24  9
33023   9534    2   24  9
45012   9533    4   24  9
47001   9533    4   24  9
48010   9533    4   24  9
50001   9533    4   24  9
49004   9532    1   24  9
9002    9531    2   24  9
67008   9531    2   24  9
40011   9530    1   24  9
42003   9529    2   24  9
42011   9529    2   24  9
55023   9528    1   24  9
40011   9530    1   24  10
42003   9529    2   24  10
42011   9529    2   24  10
55023   9528    1   24  10
40012   9527    3   24  10
43007   9527    3   24  10
47011   9527    3   24  10
52004   9526    4   24  10
52005   9526    4   24  10
52006   9526    4   24  10
52007   9526    4   24  10
19001   9525    1   24  10
57008   9524    5   24  10
57010   9524    5   24  10
58006   9524    5   24  10
58008   9524    5   24  10
59001   9524    5   24  10
65004   9523    1   24  10
75003   9522    1   24  10
76007   9521    4   24  10
77002   9521    4   24  10
77003   9521    4   24  10
78003   9521    4   24  10
74001   9520    1   24  10
42003   9529    2   24  11
42011   9529    2   24  11
55023   9528    1   24  11
40012   9527    3   24  11
43007   9527    3   24  11
47011   9527    3   24  11
52004   9526    4   24  11
52005   9526    4   24  11
52006   9526    4   24  11
52007   9526    4   24  11
19001   9525    1   24  11
57008   9524    5   24  11
57010   9524    5   24  11
58006   9524    5   24  11
58008   9524    5   24  11
59001   9524    5   24  11
65004   9523    1   24  11
75003   9522    1   24  11
76007   9521    4   24  11
77002   9521    4   24  11
77003   9521    4   24  11
78003   9521    4   24  11
74001   9520    1   24  11
33021   9518    1   24  11
55023   9528    1   22  12
40012   9527    3   22  12
43007   9527    3   22  12
47011   9527    3   22  12
52004   9526    4   22  12
52005   9526    4   22  12
52006   9526    4   22  12
52007   9526    4   22  12
19001   9525    1   22  12
57008   9524    5   22  12
57010   9524    5   22  12
58006   9524    5   22  12
58008   9524    5   22  12
59001   9524    5   22  12
65004   9523    1   22  12
75003   9522    1   22  12
76007   9521    4   22  12
77002   9521    4   22  12
77003   9521    4   22  12
78003   9521    4   22  12
74001   9520    1   22  12
33021   9518    1   22  12
40012   9527    3   21  13
43007   9527    3   21  13
47011   9527    3   21  13
52004   9526    4   21  13
52005   9526    4   21  13
52006   9526    4   21  13
52007   9526    4   21  13
19001   9525    1   21  13
57008   9524    5   21  13
57010   9524    5   21  13
58006   9524    5   21  13
58008   9524    5   21  13
59001   9524    5   21  13
65004   9523    1   21  13
75003   9522    1   21  13
76007   9521    4   21  13
77002   9521    4   21  13
77003   9521    4   21  13
78003   9521    4   21  13
74001   9520    1   21  13
33021   9518    1   21  13
52004   9526    4   18  14
52005   9526    4   18  14
52006   9526    4   18  14
52007   9526    4   18  14
19001   9525    1   18  14
57008   9524    5   18  14
57010   9524    5   18  14
58006   9524    5   18  14
58008   9524    5   18  14
59001   9524    5   18  14
65004   9523    1   18  14
75003   9522    1   18  14
76007   9521    4   18  14
77002   9521    4   18  14
77003   9521    4   18  14
78003   9521    4   18  14
74001   9520    1   18  14
33021   9518    1   18  14
19001   9525    1   14  15
57008   9524    5   14  15
57010   9524    5   14  15
58006   9524    5   14  15
58008   9524    5   14  15
59001   9524    5   14  15
65004   9523    1   14  15
75003   9522    1   14  15
76007   9521    4   14  15
77002   9521    4   14  15
77003   9521    4   14  15
78003   9521    4   14  15
74001   9520    1   14  15
33021   9518    1   14  15
57008   9524    5   13  16
57010   9524    5   13  16
58006   9524    5   13  16
58008   9524    5   13  16
59001   9524    5   13  16
65004   9523    1   13  16
75003   9522    1   13  16
76007   9521    4   13  16
77002   9521    4   13  16
77003   9521    4   13  16
78003   9521    4   13  16
74001   9520    1   13  16
33021   9518    1   13  16
65004   9523    1   8   17
75003   9522    1   8   17
76007   9521    4   8   17
77002   9521    4   8   17
77003   9521    4   8   17
78003   9521    4   8   17
74001   9520    1   8   17
33021   9518    1   8   17
75003   9522    1   7   18
76007   9521    4   7   18
77002   9521    4   7   18
77003   9521    4   7   18
78003   9521    4   7   18
74001   9520    1   7   18
33021   9518    1   7   18
76007   9521    4   6   19
77002   9521    4   6   19
77003   9521    4   6   19
78003   9521    4   6   19
74001   9520    1   6   19
33021   9518    1   6   19
74001   9520    1   2   20
33021   9518    1   2   20
33021   9518    1   1   21

If you see the output, within Count_sum = 27 there are 2 distinct episodes, 4 episodes for Count_sum = 25, 2 episodes for Count_sum = 24, etc..

The Episode_ID column starts from 1 to 21 where 1 is the episode with largest Count_group and when there are 2 or more episode with same Count_group they need to be ordered by Day decreasing = TRUE.

Here what I tried from (Update) Add index column to data.frame based on two columns but does not work:

1)

df$Episode_ID <- cumsum(c(1,abs(diff(df$Day)) > 1) + c(0,diff(df$Count_sum) != 0) > 0)

2)

library(data.table)
Episode_ID <-setDT(df)[, if(Count_sum[1L] < .N) ((seq_len(.N)-1) %/% Count_sum[1L])+1  
                      else as.numeric(Count_sum), rleid(Count_sum)][, rleid(V1)]
df = df[, Episode_ID := Episode_ID]

Any suggestion?

1条回答
虎瘦雄心在
2楼-- · 2019-06-14 19:13

I must admit I haven't fully understood all the details, in particular, there is no explicit definition of episode and the provided data appear to me as not fully matching the description of how Count_sum is computed.

Nevertheless, I was able to reproduce the expected results.

The proposed solution is based on the observation that Day is consisting of a number of monotonically decreasing sequences (which presumably is what the OP means by episodes). So, the task is to identify the breaks where a new sequence starts, to advance the sequence counter, and to number all subsequent rows of that sequence with that sequence id.

This is achieved by

library(data.table)   # CRAN version 1.10.4 used
setDT(expected)[, Sequence_ID := cumsum(Day - shift(Day, fill = -1L) > 0)]

Note that here the second data set provided by the OP is used to demonstrate that the computations are in line with the expected results. For instance, the first break happens between rows 29 and 30:

expected[28:31]
#      ID  Day Count Count_sum Episode_ID Sequence_ID
#1: 58008 9524     5        29          1           1
#2: 59001 9524     5        29          1           1
#3: 58008 9537     3        27          2           2
#4: 66001 9537     3        27          2           2

The expression has recognized the jump from day 9524 to 9537.

Unfortunately, there is a discrepancy at the end:

tail(expected, 11)
#       ID  Day Count Count_sum Episode_ID Sequence_ID
# 1: 74001 9520     1         7         18          18
# 2: 33021 9518     1         7         18          18
# 3: 76007 9521     4         6         19          19
# 4: 77002 9521     4         6         19          19
# 5: 77003 9521     4         6         19          19
# 6: 78003 9521     4         6         19          19
# 7: 74001 9520     1         6         19          19
# 8: 33021 9518     1         6         19          19
# 9: 74001 9520     1         2         20          20
#10: 33021 9518     1         2         20          20
#11: 33021 9518     1         1         21          20

The OP has assigned the last row to a new episode although the days are still in monotonically decreasing order. If this is just an error in the provided data, we are done.

If this is intentional, then changes in Count_sum have to be considered in the numbering of episodes by using data.table's handy rleid() function:

expected[, new_Episode_ID := rleid(Sequence_ID, Count_sum)]
tail(expected, 5L)
#      ID  Day Count Count_sum Episode_ID Sequence_ID new_Episode_ID
#1: 74001 9520     1         6         19          19             19
#2: 33021 9518     1         6         19          19             19
#3: 74001 9520     1         2         20          20             20
#4: 33021 9518     1         2         20          20             20
#5: 33021 9518     1         1         21          20             21

This can also be written more concisely as a one-liner

expected[, new_Episode_ID := rleid(cumsum(Day - shift(Day, fill = -1L) > 0), Count_sum)]

Data

expected <- structure(list(ID = c(33021L, 33029L, 34001L, 32010L, 33023L, 
45012L, 47001L, 48010L, 50001L, 49004L, 9002L, 67008L, 40011L, 
42003L, 42011L, 55023L, 40012L, 43007L, 47011L, 52004L, 52005L, 
52006L, 52007L, 19001L, 57008L, 57010L, 58006L, 58008L, 59001L, 
58008L, 66001L, 68001L, 54057L, 33021L, 33029L, 34001L, 32010L, 
33023L, 45012L, 47001L, 48010L, 50001L, 49004L, 9002L, 67008L, 
40011L, 42003L, 42011L, 55023L, 40012L, 43007L, 47011L, 52004L, 
52005L, 52006L, 52007L, 32010L, 33023L, 45012L, 47001L, 48010L, 
50001L, 49004L, 9002L, 67008L, 40011L, 42003L, 42011L, 55023L, 
40012L, 43007L, 47011L, 52004L, 52005L, 52006L, 52007L, 19001L, 
57008L, 57010L, 58006L, 58008L, 59001L, 65004L, 49004L, 9002L, 
67008L, 40011L, 42003L, 42011L, 55023L, 40012L, 43007L, 47011L, 
52004L, 52005L, 52006L, 52007L, 19001L, 57008L, 57010L, 58006L, 
58008L, 59001L, 65004L, 75003L, 76007L, 77002L, 77003L, 78003L, 
48007L, 48011L, 58008L, 66001L, 68001L, 54057L, 33021L, 33029L, 
34001L, 32010L, 33023L, 45012L, 47001L, 48010L, 50001L, 49004L, 
9002L, 67008L, 40011L, 42003L, 42011L, 55023L, 40012L, 43007L, 
47011L, 54057L, 33021L, 33029L, 34001L, 32010L, 33023L, 45012L, 
47001L, 48010L, 50001L, 49004L, 9002L, 67008L, 40011L, 42003L, 
42011L, 55023L, 40012L, 43007L, 47011L, 52004L, 52005L, 52006L, 
52007L, 19001L, 45012L, 47001L, 48010L, 50001L, 49004L, 9002L, 
67008L, 40011L, 42003L, 42011L, 55023L, 40012L, 43007L, 47011L, 
52004L, 52005L, 52006L, 52007L, 19001L, 57008L, 57010L, 58006L, 
58008L, 59001L, 65004L, 75003L, 9002L, 67008L, 40011L, 42003L, 
42011L, 55023L, 40012L, 43007L, 47011L, 52004L, 52005L, 52006L, 
52007L, 19001L, 57008L, 57010L, 58006L, 58008L, 59001L, 65004L, 
75003L, 76007L, 77002L, 77003L, 78003L, 74001L, 39093L, 41006L, 
48007L, 48011L, 58008L, 66001L, 68001L, 54057L, 33021L, 33029L, 
34001L, 32010L, 33023L, 45012L, 47001L, 48010L, 50001L, 49004L, 
9002L, 67008L, 40011L, 42003L, 42011L, 55023L, 40011L, 42003L, 
42011L, 55023L, 40012L, 43007L, 47011L, 52004L, 52005L, 52006L, 
52007L, 19001L, 57008L, 57010L, 58006L, 58008L, 59001L, 65004L, 
75003L, 76007L, 77002L, 77003L, 78003L, 74001L, 42003L, 42011L, 
55023L, 40012L, 43007L, 47011L, 52004L, 52005L, 52006L, 52007L, 
19001L, 57008L, 57010L, 58006L, 58008L, 59001L, 65004L, 75003L, 
76007L, 77002L, 77003L, 78003L, 74001L, 33021L, 55023L, 40012L, 
43007L, 47011L, 52004L, 52005L, 52006L, 52007L, 19001L, 57008L, 
57010L, 58006L, 58008L, 59001L, 65004L, 75003L, 76007L, 77002L, 
77003L, 78003L, 74001L, 33021L, 40012L, 43007L, 47011L, 52004L, 
52005L, 52006L, 52007L, 19001L, 57008L, 57010L, 58006L, 58008L, 
59001L, 65004L, 75003L, 76007L, 77002L, 77003L, 78003L, 74001L, 
33021L, 52004L, 52005L, 52006L, 52007L, 19001L, 57008L, 57010L, 
58006L, 58008L, 59001L, 65004L, 75003L, 76007L, 77002L, 77003L, 
78003L, 74001L, 33021L, 19001L, 57008L, 57010L, 58006L, 58008L, 
59001L, 65004L, 75003L, 76007L, 77002L, 77003L, 78003L, 74001L, 
33021L, 57008L, 57010L, 58006L, 58008L, 59001L, 65004L, 75003L, 
76007L, 77002L, 77003L, 78003L, 74001L, 33021L, 65004L, 75003L, 
76007L, 77002L, 77003L, 78003L, 74001L, 33021L, 75003L, 76007L, 
77002L, 77003L, 78003L, 74001L, 33021L, 76007L, 77002L, 77003L, 
78003L, 74001L, 33021L, 74001L, 33021L, 33021L), Day = c(9535L, 
9535L, 9535L, 9534L, 9534L, 9533L, 9533L, 9533L, 9533L, 9532L, 
9531L, 9531L, 9530L, 9529L, 9529L, 9528L, 9527L, 9527L, 9527L, 
9526L, 9526L, 9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 9524L, 
9524L, 9537L, 9537L, 9537L, 9536L, 9535L, 9535L, 9535L, 9534L, 
9534L, 9533L, 9533L, 9533L, 9533L, 9532L, 9531L, 9531L, 9530L, 
9529L, 9529L, 9528L, 9527L, 9527L, 9527L, 9526L, 9526L, 9526L, 
9526L, 9534L, 9534L, 9533L, 9533L, 9533L, 9533L, 9532L, 9531L, 
9531L, 9530L, 9529L, 9529L, 9528L, 9527L, 9527L, 9527L, 9526L, 
9526L, 9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 9524L, 9524L, 
9523L, 9532L, 9531L, 9531L, 9530L, 9529L, 9529L, 9528L, 9527L, 
9527L, 9527L, 9526L, 9526L, 9526L, 9526L, 9525L, 9524L, 9524L, 
9524L, 9524L, 9524L, 9523L, 9522L, 9521L, 9521L, 9521L, 9521L, 
9538L, 9538L, 9537L, 9537L, 9537L, 9536L, 9535L, 9535L, 9535L, 
9534L, 9534L, 9533L, 9533L, 9533L, 9533L, 9532L, 9531L, 9531L, 
9530L, 9529L, 9529L, 9528L, 9527L, 9527L, 9527L, 9536L, 9535L, 
9535L, 9535L, 9534L, 9534L, 9533L, 9533L, 9533L, 9533L, 9532L, 
9531L, 9531L, 9530L, 9529L, 9529L, 9528L, 9527L, 9527L, 9527L, 
9526L, 9526L, 9526L, 9526L, 9525L, 9533L, 9533L, 9533L, 9533L, 
9532L, 9531L, 9531L, 9530L, 9529L, 9529L, 9528L, 9527L, 9527L, 
9527L, 9526L, 9526L, 9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 
9524L, 9524L, 9523L, 9522L, 9531L, 9531L, 9530L, 9529L, 9529L, 
9528L, 9527L, 9527L, 9527L, 9526L, 9526L, 9526L, 9526L, 9525L, 
9524L, 9524L, 9524L, 9524L, 9524L, 9523L, 9522L, 9521L, 9521L, 
9521L, 9521L, 9520L, 9539L, 9539L, 9538L, 9538L, 9537L, 9537L, 
9537L, 9536L, 9535L, 9535L, 9535L, 9534L, 9534L, 9533L, 9533L, 
9533L, 9533L, 9532L, 9531L, 9531L, 9530L, 9529L, 9529L, 9528L, 
9530L, 9529L, 9529L, 9528L, 9527L, 9527L, 9527L, 9526L, 9526L, 
9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 9524L, 9524L, 9523L, 
9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 9529L, 9529L, 9528L, 
9527L, 9527L, 9527L, 9526L, 9526L, 9526L, 9526L, 9525L, 9524L, 
9524L, 9524L, 9524L, 9524L, 9523L, 9522L, 9521L, 9521L, 9521L, 
9521L, 9520L, 9518L, 9528L, 9527L, 9527L, 9527L, 9526L, 9526L, 
9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 9524L, 9524L, 9523L, 
9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 9518L, 9527L, 9527L, 
9527L, 9526L, 9526L, 9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 
9524L, 9524L, 9523L, 9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 
9518L, 9526L, 9526L, 9526L, 9526L, 9525L, 9524L, 9524L, 9524L, 
9524L, 9524L, 9523L, 9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 
9518L, 9525L, 9524L, 9524L, 9524L, 9524L, 9524L, 9523L, 9522L, 
9521L, 9521L, 9521L, 9521L, 9520L, 9518L, 9524L, 9524L, 9524L, 
9524L, 9524L, 9523L, 9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 
9518L, 9523L, 9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 9518L, 
9522L, 9521L, 9521L, 9521L, 9521L, 9520L, 9518L, 9521L, 9521L, 
9521L, 9521L, 9520L, 9518L, 9520L, 9518L, 9518L), Count = c(3L, 
3L, 3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 
3L, 3L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 3L, 3L, 3L, 1L, 
3L, 3L, 3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 2L, 2L, 
1L, 2L, 2L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 
5L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 
1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 4L, 4L, 4L, 2L, 2L, 3L, 3L, 
3L, 1L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 2L, 2L, 1L, 2L, 
2L, 1L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 2L, 4L, 4L, 4L, 4L, 1L, 
2L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 4L, 4L, 
4L, 4L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 
1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 4L, 4L, 
4L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 1L, 3L, 3L, 3L, 2L, 2L, 4L, 
4L, 4L, 4L, 1L, 2L, 2L, 1L, 2L, 2L, 1L, 1L, 2L, 2L, 1L, 3L, 3L, 
3L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 4L, 4L, 
4L, 1L, 2L, 2L, 1L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 
5L, 5L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 3L, 3L, 3L, 4L, 4L, 
4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 
4L, 4L, 4L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 
1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 
4L, 4L, 4L, 1L, 1L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 4L, 4L, 4L, 4L, 
1L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 4L, 4L, 4L, 4L, 1L, 
1L, 4L, 4L, 4L, 4L, 1L, 1L, 1L, 1L, 1L), Count_sum = c(29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 29L, 
29L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 27L, 
27L, 27L, 27L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 
26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 26L, 
26L, 26L, 26L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 25L, 
25L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 
24L, 24L, 24L, 24L, 24L, 24L, 24L, 24L, 22L, 22L, 22L, 22L, 22L, 
22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 22L, 
22L, 22L, 22L, 22L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 
21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 21L, 18L, 
18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 18L, 
18L, 18L, 18L, 18L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
14L, 14L, 14L, 14L, 14L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
13L, 13L, 13L, 13L, 13L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 7L, 
7L, 7L, 7L, 7L, 7L, 7L, 6L, 6L, 6L, 6L, 6L, 6L, 2L, 2L, 1L), 
    Episode_ID = c(1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
    1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 
    3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 
    4L, 4L, 4L, 4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 
    5L, 5L, 5L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 
    6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 
    7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 7L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 8L, 
    8L, 8L, 8L, 8L, 8L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 
    9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 9L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 
    10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 10L, 11L, 
    11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 
    11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 11L, 12L, 
    12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 
    12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 12L, 13L, 13L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 13L, 
    13L, 13L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 14L, 14L, 
    14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 14L, 
    15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 15L, 
    15L, 15L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 16L, 
    16L, 16L, 16L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 17L, 18L, 
    18L, 18L, 18L, 18L, 18L, 18L, 19L, 19L, 19L, 19L, 19L, 19L, 
    20L, 20L, 21L)), .Names = c("ID", "Day", "Count", "Count_sum", 
"Episode_ID"), row.names = c(NA, -395L), class = "data.frame")
查看更多
登录 后发表回答