df = read.table(text = 'ID Day Count Count_sum
33021 9535 3 29
33029 9535 3 29
34001 9535 3 29
32010 9534 2 29
33023 9534 2 29
45012 9533 4 29
47001 9533 4 29
48010 9533 4 29
50001 9533 4 29
49004 9532 1 29
9002 9531 2 29
67008 9531 2 29
40011 9530 1 29
42003 9529 2 29
42011 9529 2 29
55023 9528 1 29
40012 9527 3 29
43007 9527 3 29
47011 9527 3 29
52004 9526 4 29
52005 9526 4 29
52006 9526 4 29
52007 9526 4 29
19001 9525 1 29
57008 9524 5 29
57010 9524 5 29
58006 9524 5 29
58008 9524 5 29
59001 9524 5 29
58008 9537 3 27
66001 9537 3 27
68001 9537 3 27
54057 9536 1 27
33021 9535 3 27
33029 9535 3 27
34001 9535 3 27
32010 9534 2 27
33023 9534 2 27
32010 9534 2 27
33023 9534 2 27
45012 9533 4 27
47001 9533 4 27
48010 9533 4 27
50001 9533 4 27
45012 9533 4 27
47001 9533 4 27
48010 9533 4 27
50001 9533 4 27
49004 9532 1 27
49004 9532 1 27
9002 9531 2 27
67008 9531 2 27
9002 9531 2 27
67008 9531 2 27
40011 9530 1 27
40011 9530 1 27
42003 9529 2 27
42011 9529 2 27
42003 9529 2 27
42011 9529 2 27
55023 9528 1 27
55023 9528 1 27
40012 9527 3 27
43007 9527 3 27
47011 9527 3 27
40012 9527 3 27
43007 9527 3 27
47011 9527 3 27
52004 9526 4 27
52005 9526 4 27
52006 9526 4 27
52007 9526 4 27
52004 9526 4 27
52005 9526 4 27
52006 9526 4 27
52007 9526 4 27
19001 9525 1 27
57008 9524 5 27
57010 9524 5 27
58006 9524 5 27
58008 9524 5 27
59001 9524 5 27
65004 9523 1 27
49004 9532 1 26
9002 9531 2 26
67008 9531 2 26
40011 9530 1 26
42003 9529 2 26
42011 9529 2 26
55023 9528 1 26
40012 9527 3 26
43007 9527 3 26
47011 9527 3 26
52004 9526 4 26
52005 9526 4 26
52006 9526 4 26
52007 9526 4 26
19001 9525 1 26
57008 9524 5 26
57010 9524 5 26
58006 9524 5 26
58008 9524 5 26
59001 9524 5 26
65004 9523 1 26
75003 9522 1 26
76007 9521 4 26
77002 9521 4 26
77003 9521 4 26
78003 9521 4 26
48007 9538 2 25
48011 9538 2 25
58008 9537 3 25
66001 9537 3 25
68001 9537 3 25
54057 9536 1 25
54057 9536 1 25
33021 9535 3 25
33029 9535 3 25
34001 9535 3 25
33021 9535 3 25
33029 9535 3 25
34001 9535 3 25
32010 9534 2 25
33023 9534 2 25
32010 9534 2 25
33023 9534 2 25
45012 9533 4 25
47001 9533 4 25
48010 9533 4 25
50001 9533 4 25
45012 9533 4 25
47001 9533 4 25
48010 9533 4 25
50001 9533 4 25
45012 9533 4 25
47001 9533 4 25
48010 9533 4 25
50001 9533 4 25
49004 9532 1 25
49004 9532 1 25
49004 9532 1 25
9002 9531 2 25
67008 9531 2 25
9002 9531 2 25
67008 9531 2 25
9002 9531 2 25
67008 9531 2 25
9002 9531 2 25
67008 9531 2 25
40011 9530 1 25
40011 9530 1 25
40011 9530 1 25
40011 9530 1 25
42003 9529 2 25
42011 9529 2 25
42003 9529 2 25
42011 9529 2 25
42003 9529 2 25
42011 9529 2 25
42003 9529 2 25
42011 9529 2 25
55023 9528 1 25
55023 9528 1 25
55023 9528 1 25
55023 9528 1 25
40012 9527 3 25
43007 9527 3 25
47011 9527 3 25
40012 9527 3 25
43007 9527 3 25
47011 9527 3 25
40012 9527 3 25
43007 9527 3 25
47011 9527 3 25
40012 9527 3 25
43007 9527 3 25
47011 9527 3 25
52004 9526 4 25
52005 9526 4 25
52006 9526 4 25
52007 9526 4 25
52004 9526 4 25
52005 9526 4 25
52006 9526 4 25
52007 9526 4 25
52004 9526 4 25
52005 9526 4 25
52006 9526 4 25
52007 9526 4 25
19001 9525 1 25
19001 9525 1 25
19001 9525 1 25
57008 9524 5 25
57010 9524 5 25
58006 9524 5 25
58008 9524 5 25
59001 9524 5 25
57008 9524 5 25
57010 9524 5 25
58006 9524 5 25
58008 9524 5 25
59001 9524 5 25
65004 9523 1 25
65004 9523 1 25
75003 9522 1 25
75003 9522 1 25
76007 9521 4 25
77002 9521 4 25
77003 9521 4 25
78003 9521 4 25
74001 9520 1 25
39093 9539 2 24
41006 9539 2 24
48007 9538 2 24
48011 9538 2 24
58008 9537 3 24
66001 9537 3 24
68001 9537 3 24
54057 9536 1 24
33021 9535 3 24
33029 9535 3 24
34001 9535 3 24
32010 9534 2 24
33023 9534 2 24
45012 9533 4 24
47001 9533 4 24
48010 9533 4 24
50001 9533 4 24
49004 9532 1 24
9002 9531 2 24
67008 9531 2 24
40011 9530 1 24
40011 9530 1 24
42003 9529 2 24
42011 9529 2 24
42003 9529 2 24
42011 9529 2 24
42003 9529 2 24
42011 9529 2 24
55023 9528 1 24
55023 9528 1 24
55023 9528 1 24
40012 9527 3 24
43007 9527 3 24
47011 9527 3 24
40012 9527 3 24
43007 9527 3 24
47011 9527 3 24
52004 9526 4 24
52005 9526 4 24
52006 9526 4 24
52007 9526 4 24
52004 9526 4 24
52005 9526 4 24
52006 9526 4 24
52007 9526 4 24
19001 9525 1 24
19001 9525 1 24
57008 9524 5 24
57010 9524 5 24
58006 9524 5 24
58008 9524 5 24
59001 9524 5 24
57008 9524 5 24
57010 9524 5 24
58006 9524 5 24
58008 9524 5 24
59001 9524 5 24
65004 9523 1 24
65004 9523 1 24
75003 9522 1 24
75003 9522 1 24
76007 9521 4 24
77002 9521 4 24
77003 9521 4 24
78003 9521 4 24
76007 9521 4 24
77002 9521 4 24
77003 9521 4 24
78003 9521 4 24
74001 9520 1 24
74001 9520 1 24
33021 9518 1 24
55023 9528 1 22
40012 9527 3 22
43007 9527 3 22
47011 9527 3 22
52004 9526 4 22
52005 9526 4 22
52006 9526 4 22
52007 9526 4 22
19001 9525 1 22
57008 9524 5 22
57010 9524 5 22
58006 9524 5 22
58008 9524 5 22
59001 9524 5 22
65004 9523 1 22
75003 9522 1 22
76007 9521 4 22
77002 9521 4 22
77003 9521 4 22
78003 9521 4 22
74001 9520 1 22
33021 9518 1 22
40012 9527 3 21
43007 9527 3 21
47011 9527 3 21
52004 9526 4 21
52005 9526 4 21
52006 9526 4 21
52007 9526 4 21
19001 9525 1 21
57008 9524 5 21
57010 9524 5 21
58006 9524 5 21
58008 9524 5 21
59001 9524 5 21
65004 9523 1 21
75003 9522 1 21
76007 9521 4 21
77002 9521 4 21
77003 9521 4 21
78003 9521 4 21
74001 9520 1 21
33021 9518 1 21
52004 9526 4 18
52005 9526 4 18
52006 9526 4 18
52007 9526 4 18
19001 9525 1 18
57008 9524 5 18
57010 9524 5 18
58006 9524 5 18
58008 9524 5 18
59001 9524 5 18
65004 9523 1 18
75003 9522 1 18
76007 9521 4 18
77002 9521 4 18
77003 9521 4 18
78003 9521 4 18
74001 9520 1 18
33021 9518 1 18
19001 9525 1 14
57008 9524 5 14
57010 9524 5 14
58006 9524 5 14
58008 9524 5 14
59001 9524 5 14
65004 9523 1 14
75003 9522 1 14
76007 9521 4 14
77002 9521 4 14
77003 9521 4 14
78003 9521 4 14
74001 9520 1 14
33021 9518 1 14
57008 9524 5 13
57010 9524 5 13
58006 9524 5 13
58008 9524 5 13
59001 9524 5 13
65004 9523 1 13
75003 9522 1 13
76007 9521 4 13
77002 9521 4 13
77003 9521 4 13
78003 9521 4 13
74001 9520 1 13
33021 9518 1 13
65004 9523 1 8
75003 9522 1 8
76007 9521 4 8
77002 9521 4 8
77003 9521 4 8
78003 9521 4 8
74001 9520 1 8
33021 9518 1 8
75003 9522 1 7
76007 9521 4 7
77002 9521 4 7
77003 9521 4 7
78003 9521 4 7
74001 9520 1 7
33021 9518 1 7
76007 9521 4 6
77002 9521 4 6
77003 9521 4 6
78003 9521 4 6
74001 9520 1 6
33021 9518 1 6
74001 9520 1 2
33021 9518 1 2
33021 9518 1 1', header = TRUE)
Day
column shows days;
Count
column shows the sum of ID in that particular Day;
Count_sum
column shows the sum of ID
by blocks of 12 days, i.e. Day + Day -1 + Day -2 + Day -3 + Day -4 + Day -5 + Day -6 + Day -7 + Day -8 + Day -9 + Day -10 + Day -11.
e.g. 1) Count_sum = 29 because it represents the sum of 3 (Day 9535) + 2 (Day 9534) + 4 (Day 9533) + 1 (Day 9532) + 2 (Day 9531) + 1 (Day 9530) + 2 (Day 9529) + 1 (Day 9528) + 3 (Day 9527) + 4 (Day 9526) + 1 (Day 9525) + 5 (Day 9524);
2) Count_sum = 27 because of 3 (Day 9537) + 1 (Day 9536) + 3 (Day 9535) + 2 (Day 9534) + 4 (Day 9533) + 1 (Day 9532) + 2 (Day 9531) + 1 (Day 9530) + 2 Day 9529) + 1 (Day 9528) + 3 (Day 9527) + 4 (Day 9526);
etc, etc..
What I need to do is to add a 5th column (Episode_ID) to df
which groups each 12-day episodes with a unique value from 1 to 21 (because in df
there are 21 unique days).
Count_sum nearly group them correctly but there can be 2 or more 12-day episodes with same Count_sum values and that can also overlap within days.
My real data.frame contains >300,000 rows and I would like also to obtain a code that works for 12-day episodes (as df
) but also for other data.frames grouped by 2,3,4,5,6,7,8,n days.
Here my expected output for df
(12-day episodes):
ID Day Count Count_sum Episode_ID
33021 9535 3 29 1
33029 9535 3 29 1
34001 9535 3 29 1
32010 9534 2 29 1
33023 9534 2 29 1
45012 9533 4 29 1
47001 9533 4 29 1
48010 9533 4 29 1
50001 9533 4 29 1
49004 9532 1 29 1
9002 9531 2 29 1
67008 9531 2 29 1
40011 9530 1 29 1
42003 9529 2 29 1
42011 9529 2 29 1
55023 9528 1 29 1
40012 9527 3 29 1
43007 9527 3 29 1
47011 9527 3 29 1
52004 9526 4 29 1
52005 9526 4 29 1
52006 9526 4 29 1
52007 9526 4 29 1
19001 9525 1 29 1
57008 9524 5 29 1
57010 9524 5 29 1
58006 9524 5 29 1
58008 9524 5 29 1
59001 9524 5 29 1
58008 9537 3 27 2
66001 9537 3 27 2
68001 9537 3 27 2
54057 9536 1 27 2
33021 9535 3 27 2
33029 9535 3 27 2
34001 9535 3 27 2
32010 9534 2 27 2
33023 9534 2 27 2
45012 9533 4 27 2
47001 9533 4 27 2
48010 9533 4 27 2
50001 9533 4 27 2
49004 9532 1 27 2
9002 9531 2 27 2
67008 9531 2 27 2
40011 9530 1 27 2
42003 9529 2 27 2
42011 9529 2 27 2
55023 9528 1 27 2
40012 9527 3 27 2
43007 9527 3 27 2
47011 9527 3 27 2
52004 9526 4 27 2
52005 9526 4 27 2
52006 9526 4 27 2
52007 9526 4 27 2
32010 9534 2 27 3
33023 9534 2 27 3
45012 9533 4 27 3
47001 9533 4 27 3
48010 9533 4 27 3
50001 9533 4 27 3
49004 9532 1 27 3
9002 9531 2 27 3
67008 9531 2 27 3
40011 9530 1 27 3
42003 9529 2 27 3
42011 9529 2 27 3
55023 9528 1 27 3
40012 9527 3 27 3
43007 9527 3 27 3
47011 9527 3 27 3
52004 9526 4 27 3
52005 9526 4 27 3
52006 9526 4 27 3
52007 9526 4 27 3
19001 9525 1 27 3
57008 9524 5 27 3
57010 9524 5 27 3
58006 9524 5 27 3
58008 9524 5 27 3
59001 9524 5 27 3
65004 9523 1 27 3
49004 9532 1 26 4
9002 9531 2 26 4
67008 9531 2 26 4
40011 9530 1 26 4
42003 9529 2 26 4
42011 9529 2 26 4
55023 9528 1 26 4
40012 9527 3 26 4
43007 9527 3 26 4
47011 9527 3 26 4
52004 9526 4 26 4
52005 9526 4 26 4
52006 9526 4 26 4
52007 9526 4 26 4
19001 9525 1 26 4
57008 9524 5 26 4
57010 9524 5 26 4
58006 9524 5 26 4
58008 9524 5 26 4
59001 9524 5 26 4
65004 9523 1 26 4
75003 9522 1 26 4
76007 9521 4 26 4
77002 9521 4 26 4
77003 9521 4 26 4
78003 9521 4 26 4
48007 9538 2 25 5
48011 9538 2 25 5
58008 9537 3 25 5
66001 9537 3 25 5
68001 9537 3 25 5
54057 9536 1 25 5
33021 9535 3 25 5
33029 9535 3 25 5
34001 9535 3 25 5
32010 9534 2 25 5
33023 9534 2 25 5
45012 9533 4 25 5
47001 9533 4 25 5
48010 9533 4 25 5
50001 9533 4 25 5
49004 9532 1 25 5
9002 9531 2 25 5
67008 9531 2 25 5
40011 9530 1 25 5
42003 9529 2 25 5
42011 9529 2 25 5
55023 9528 1 25 5
40012 9527 3 25 5
43007 9527 3 25 5
47011 9527 3 25 5
54057 9536 1 25 6
33021 9535 3 25 6
33029 9535 3 25 6
34001 9535 3 25 6
32010 9534 2 25 6
33023 9534 2 25 6
45012 9533 4 25 6
47001 9533 4 25 6
48010 9533 4 25 6
50001 9533 4 25 6
49004 9532 1 25 6
9002 9531 2 25 6
67008 9531 2 25 6
40011 9530 1 25 6
42003 9529 2 25 6
42011 9529 2 25 6
55023 9528 1 25 6
40012 9527 3 25 6
43007 9527 3 25 6
47011 9527 3 25 6
52004 9526 4 25 6
52005 9526 4 25 6
52006 9526 4 25 6
52007 9526 4 25 6
19001 9525 1 25 6
45012 9533 4 25 7
47001 9533 4 25 7
48010 9533 4 25 7
50001 9533 4 25 7
49004 9532 1 25 7
9002 9531 2 25 7
67008 9531 2 25 7
40011 9530 1 25 7
42003 9529 2 25 7
42011 9529 2 25 7
55023 9528 1 25 7
40012 9527 3 25 7
43007 9527 3 25 7
47011 9527 3 25 7
52004 9526 4 25 7
52005 9526 4 25 7
52006 9526 4 25 7
52007 9526 4 25 7
19001 9525 1 25 7
57008 9524 5 25 7
57010 9524 5 25 7
58006 9524 5 25 7
58008 9524 5 25 7
59001 9524 5 25 7
65004 9523 1 25 7
75003 9522 1 25 7
9002 9531 2 25 8
67008 9531 2 25 8
40011 9530 1 25 8
42003 9529 2 25 8
42011 9529 2 25 8
55023 9528 1 25 8
40012 9527 3 25 8
43007 9527 3 25 8
47011 9527 3 25 8
52004 9526 4 25 8
52005 9526 4 25 8
52006 9526 4 25 8
52007 9526 4 25 8
19001 9525 1 25 8
57008 9524 5 25 8
57010 9524 5 25 8
58006 9524 5 25 8
58008 9524 5 25 8
59001 9524 5 25 8
65004 9523 1 25 8
75003 9522 1 25 8
76007 9521 4 25 8
77002 9521 4 25 8
77003 9521 4 25 8
78003 9521 4 25 8
74001 9520 1 25 8
39093 9539 2 24 9
41006 9539 2 24 9
48007 9538 2 24 9
48011 9538 2 24 9
58008 9537 3 24 9
66001 9537 3 24 9
68001 9537 3 24 9
54057 9536 1 24 9
33021 9535 3 24 9
33029 9535 3 24 9
34001 9535 3 24 9
32010 9534 2 24 9
33023 9534 2 24 9
45012 9533 4 24 9
47001 9533 4 24 9
48010 9533 4 24 9
50001 9533 4 24 9
49004 9532 1 24 9
9002 9531 2 24 9
67008 9531 2 24 9
40011 9530 1 24 9
42003 9529 2 24 9
42011 9529 2 24 9
55023 9528 1 24 9
40011 9530 1 24 10
42003 9529 2 24 10
42011 9529 2 24 10
55023 9528 1 24 10
40012 9527 3 24 10
43007 9527 3 24 10
47011 9527 3 24 10
52004 9526 4 24 10
52005 9526 4 24 10
52006 9526 4 24 10
52007 9526 4 24 10
19001 9525 1 24 10
57008 9524 5 24 10
57010 9524 5 24 10
58006 9524 5 24 10
58008 9524 5 24 10
59001 9524 5 24 10
65004 9523 1 24 10
75003 9522 1 24 10
76007 9521 4 24 10
77002 9521 4 24 10
77003 9521 4 24 10
78003 9521 4 24 10
74001 9520 1 24 10
42003 9529 2 24 11
42011 9529 2 24 11
55023 9528 1 24 11
40012 9527 3 24 11
43007 9527 3 24 11
47011 9527 3 24 11
52004 9526 4 24 11
52005 9526 4 24 11
52006 9526 4 24 11
52007 9526 4 24 11
19001 9525 1 24 11
57008 9524 5 24 11
57010 9524 5 24 11
58006 9524 5 24 11
58008 9524 5 24 11
59001 9524 5 24 11
65004 9523 1 24 11
75003 9522 1 24 11
76007 9521 4 24 11
77002 9521 4 24 11
77003 9521 4 24 11
78003 9521 4 24 11
74001 9520 1 24 11
33021 9518 1 24 11
55023 9528 1 22 12
40012 9527 3 22 12
43007 9527 3 22 12
47011 9527 3 22 12
52004 9526 4 22 12
52005 9526 4 22 12
52006 9526 4 22 12
52007 9526 4 22 12
19001 9525 1 22 12
57008 9524 5 22 12
57010 9524 5 22 12
58006 9524 5 22 12
58008 9524 5 22 12
59001 9524 5 22 12
65004 9523 1 22 12
75003 9522 1 22 12
76007 9521 4 22 12
77002 9521 4 22 12
77003 9521 4 22 12
78003 9521 4 22 12
74001 9520 1 22 12
33021 9518 1 22 12
40012 9527 3 21 13
43007 9527 3 21 13
47011 9527 3 21 13
52004 9526 4 21 13
52005 9526 4 21 13
52006 9526 4 21 13
52007 9526 4 21 13
19001 9525 1 21 13
57008 9524 5 21 13
57010 9524 5 21 13
58006 9524 5 21 13
58008 9524 5 21 13
59001 9524 5 21 13
65004 9523 1 21 13
75003 9522 1 21 13
76007 9521 4 21 13
77002 9521 4 21 13
77003 9521 4 21 13
78003 9521 4 21 13
74001 9520 1 21 13
33021 9518 1 21 13
52004 9526 4 18 14
52005 9526 4 18 14
52006 9526 4 18 14
52007 9526 4 18 14
19001 9525 1 18 14
57008 9524 5 18 14
57010 9524 5 18 14
58006 9524 5 18 14
58008 9524 5 18 14
59001 9524 5 18 14
65004 9523 1 18 14
75003 9522 1 18 14
76007 9521 4 18 14
77002 9521 4 18 14
77003 9521 4 18 14
78003 9521 4 18 14
74001 9520 1 18 14
33021 9518 1 18 14
19001 9525 1 14 15
57008 9524 5 14 15
57010 9524 5 14 15
58006 9524 5 14 15
58008 9524 5 14 15
59001 9524 5 14 15
65004 9523 1 14 15
75003 9522 1 14 15
76007 9521 4 14 15
77002 9521 4 14 15
77003 9521 4 14 15
78003 9521 4 14 15
74001 9520 1 14 15
33021 9518 1 14 15
57008 9524 5 13 16
57010 9524 5 13 16
58006 9524 5 13 16
58008 9524 5 13 16
59001 9524 5 13 16
65004 9523 1 13 16
75003 9522 1 13 16
76007 9521 4 13 16
77002 9521 4 13 16
77003 9521 4 13 16
78003 9521 4 13 16
74001 9520 1 13 16
33021 9518 1 13 16
65004 9523 1 8 17
75003 9522 1 8 17
76007 9521 4 8 17
77002 9521 4 8 17
77003 9521 4 8 17
78003 9521 4 8 17
74001 9520 1 8 17
33021 9518 1 8 17
75003 9522 1 7 18
76007 9521 4 7 18
77002 9521 4 7 18
77003 9521 4 7 18
78003 9521 4 7 18
74001 9520 1 7 18
33021 9518 1 7 18
76007 9521 4 6 19
77002 9521 4 6 19
77003 9521 4 6 19
78003 9521 4 6 19
74001 9520 1 6 19
33021 9518 1 6 19
74001 9520 1 2 20
33021 9518 1 2 20
33021 9518 1 1 21
If you see the output, within Count_sum
= 27 there are 2 distinct episodes, 4 episodes for Count_sum
= 25, 2 episodes for Count_sum
= 24, etc..
The Episode_ID column starts from 1 to 21 where 1 is the episode with largest Count_group and when there are 2 or more episode with same Count_group they need to be ordered by Day decreasing = TRUE.
Here what I tried from (Update) Add index column to data.frame based on two columns but does not work:
1)
df$Episode_ID <- cumsum(c(1,abs(diff(df$Day)) > 1) + c(0,diff(df$Count_sum) != 0) > 0)
2)
library(data.table)
Episode_ID <-setDT(df)[, if(Count_sum[1L] < .N) ((seq_len(.N)-1) %/% Count_sum[1L])+1
else as.numeric(Count_sum), rleid(Count_sum)][, rleid(V1)]
df = df[, Episode_ID := Episode_ID]
Any suggestion?