R - If date falls within range, then sum

2019-05-03 23:38发布

问题:

I have managed to do this in excel easily but want to complete this in R:

I have two data frames:

MediaPlanDF (215 obs, 29 var)

I am only concerned with 4 of the variables:

Start Date (flight date), End Date (flight date), Daily Spend, Daily impressions

OutputDF (35 obs, 1 var)

Date: OutputDF[[1]]

35 observations from 8/31/15 to 10/4/15

So that is the setup, or at least how I have set it up (all dates formatted properly).

What I need to do is essentially add 2 columns to ouputDF: Daily Impressions & Daily Spend

Daily Impressions = if the date in OutputDF falls within the start and end date of MediaPlanDF then sum up all #dailyimpressions that meet that criteria.

Daily Impressions = if the date in OutputDF falls within the start and end date of MediaPlanDF then sum up all #dailyspend that meet that criteria.

Here is an example of the two DFs:

MediaPlanDF (part i am concerned about):

#daysinflight   #dailyimpressions   #dailyspend Campaign name   Campaign ID Campaign flight start date  Campaign flight end date
35  392857.1429 1571.428571 A Real Advertiser   RAND0M  8/31/2015   10/4/2015
35  85714.28571 428.5714286 A Real Advertiser   RAND0M  8/31/2015   10/4/2015
35  142857.1429 714.2857143 A Real Advertiser   RAND0M  8/31/2015   10/4/2015
35  62857.14286 942.8571429 A Real Advertiser   RAND0M  8/31/2015   10/4/2015

OutputDF

Date
8/31/2015
9/1/2015
9/2/2015
9/3/2015
9/4/2015
9/5/2015
...
10/4/2015

Thanks for any help in advance.

Im getting this error:

Error in as.POSIXlt.character(as.character(x), ...) : character string is not in a standard unambiguous format

here is the dput

structure(list(site = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("Accuen", 
"Amazon", "Bleacher Report", "Brightroll", "Buzzfeed", "CBSi", 
"College Humor", "Complex", "ESPN", "GDN", "HULU", "IGN", "Millennial", 
"Nativo", "NBA", "NBC Sports", "Pandora", "Reddit", "Spotify", 
"Tremor", "TrueX", "Twitch", "Wikia", "Woven", "Yahoo!", "YouTube"
), class = "factor"), daysinflight = c(35L, 35L, 35L, 35L, 35L, 
35L), dailyimpressions = c(392857.1429, 85714.28571, 142857.1429, 
62857.14286, 17142.85714, 72380.94286), dailyspend = c(1571.428571, 
428.5714286, 714.2857143, 942.8571429, 428.5714286, 1085.714286
), Campaign.name = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "A Real Advertiser", class = "factor"), 
    Campaign.ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "RAND0M", class = "factor"), 
    startdate = structure(c(16678, 16678, 16678, 16678, 16678, 
    16678), class = "Date"), enddate = structure(c(16712, 16712, 
    16712, 16712, 16712, 16712), class = "Date"), Campaign.budget = c(5100206L, 
    5100206L, 5100206L, 5100206L, 5100206L, 5100206L), Campaign.planned.cost = c(4663350.2, 
    4663350.2, 4663350.2, 4663350.2, 4663350.2, 4663350.2), Campaign.buy.total = c(4663350.2, 
    4663350.2, 4663350.2, 4663350.2, 4663350.2, 4663350.2), Supplier = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("ACCUEN", "AMAZON (AMZN MED GP)", 
    "BRIGHTROLL", "BUZZFEED.COM", "CBS DIGITAL MEDIA", "COLLEGE HUMOR", 
    "COMPLEX.COM", "ESPN.COM", "GOOGLE DISPLAY NTWK", "HULU", 
    "IGN.COM", "MILLENNIAL MEDIA", "NATIVO.NET", "NBC.COM", "PANDORA MEDIA, INC.", 
    "REDDIT.COM", "SPOTIFY.COM", "TREMORMEDIA.COM", "TRUEX MEDIA INC.", 
    "TURNER MEDIA GROUP", "TWITCH.TV", "WIKIA.COM", "WOVENDIGITAL.COM", 
    "YAHOO! US", "YOUTUBE, LLC."), class = "factor"), Site = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = c("ACCUEN", "AMAZON.COM", "BLEACHER REPORT", 
    "BRIGHTROLL", "BUZZFEED.COM", "CBS INTERACTIVE", "COLLEGEHUMOR", 
    "COMPLEX", "COMPLEX.COM", "ELECTUS", "ESPN.COM", "GOOGLE DISPLAY NTWK", 
    "HULU", "IGN.COM", "MILLENNIAL MEDIA", "NATIVO", "NBA", "NBCSPORTS.COM", 
    "PANDORA", "REDDIT", "SPOTIFY", "TREMOR VIDEO", "TRUEX", 
    "TWITCH", "WIKIA", "WOVEN", "YAHOO", "YOUTUBE, LLC."), class = "factor"), 
    Flight.start.date = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("10/3/2015", 
    "8/31/2015", "9/1/2015", "9/10/2015", "9/11/2015", "9/13/2015", 
    "9/14/2015", "9/15/2015", "9/16/2015", "9/17/2015", "9/18/2015", 
    "9/2/2015", "9/20/2015", "9/21/2015", "9/24/2015", "9/25/2015", 
    "9/27/2015", "9/28/2015", "9/7/2015", "9/9/2015"), class = "factor"), 
    Flight.end.date = structure(c(2L, 2L, 2L, 2L, 2L, 2L), .Label = c("10/3/2015", 
    "10/4/2015", "9/11/2015", "9/13/2015", "9/15/2015", "9/17/2015", 
    "9/18/2015", "9/2/2015", "9/20/2015", "9/27/2015", "9/30/2015"
    ), class = "factor"), Cost.method = structure(c(3L, 3L, 3L, 
    3L, 3L, 3L), .Label = c("CPC", "CPE", "CPM", "Flat", "Free"
    ), class = "factor"), Rate = c(43, 15, 5, 125, 25, 15), Planned.unit.amount = c(13750000L, 
    3000000L, 5000000L, 2200000L, 600000L, 2533333L), Cost = c(55000, 
    15000, 25000, 33000, 15000, 38000), Excluded = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "No", class = "factor"), Company.ID = structure(c(1L, 
    1L, 1L, 1L, 1L, 1L), .Label = "skrt", class = "factor"), 
    Person.ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "smgboi", class = "factor"), 
    Exported.by = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "some guy", class = "factor"), 
    Exported.on = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "2015-10-06 18:53:12, EDT", class = "factor"), 
    Exported.from = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = "media", class = "factor")), .Names = c("site", 
"daysinflight", "dailyimpressions", "dailyspend", "Campaign.name", 
"Campaign.ID", "startdate", "enddate", "Campaign.budget", "Campaign.planned.cost", 
"Campaign.buy.total", "Supplier", "Site", "Placement.name", "Buy.details", 
"Positioning", "Unit.dimensions", "Flight.start.date", "Flight.end.date", 
"Cost.method", "Rate", "Planned.unit.amount", "Cost", "Excluded", 
"Company.ID", "Person.ID", "Exported.by", "Exported.on", "Exported.from"
), row.names = c(NA, 6L), class = "data.frame")

回答1:

First we need to make sure your date formats etc are correct. I'll assume they are, my versions are at the end. You have't provided a dput and have some weird column names, so double check spacing and capitals.

Next, let's construct an interval object for each campaign from the library lubridate:

library(lubridate)
MediaPlanDF$interval <- interval(MediaPlanDF$Campaign.flight.1, MediaPlanDF$end.date)

Now we can test if each item in outputDF is in each interval, and if so, sum it (your test data has all elements in all intervals):

output <- do.call(rbind, lapply(OutputDF$Date, function(x){
                              index <- x %within% MediaPlanDF$interval;
                              list(impressions = sum(MediaPlanDF$dailyimpressions[index]),
                                   spend = sum(MediaPlanDF$dailyspend[index]))}))

Where we get the output:

cbind(output, OutputDF)
  impressions    spend       Date
1    684285.7 3657.143 2015-08-31
2    684285.7 3657.143 2015-09-01
3    684285.7 3657.143 2015-09-02
4    684285.7 3657.143 2015-09-03
5    684285.7 3657.143 2015-09-04
6    684285.7 3657.143 2015-09-05

data:

OutputDF:

structure(list(Date = structure(c(16678, 16679, 16680, 16681, 
16682, 16683), class = "Date")), .Names = "Date", row.names = c(NA, 
-6L), class = "data.frame")

MediaPlanDF:

structure(list(daysinflight = c(35L, 35L, 35L, 35L), dailyimpressions = c(392857.1429, 
85714.28571, 142857.1429, 62857.14286), dailyspend = c(1571.428571, 
428.5714286, 714.2857143, 942.8571429), Campaign.name = structure(c(1L, 
1L, 1L, 1L), .Label = "A", class = "factor"), Campaign.ID = structure(c(1L, 
1L, 1L, 1L), .Label = "Real", class = "factor"), Campaign.flight = structure(c(1L, 
1L, 1L, 1L), .Label = "Advertiser", class = "factor"), start.date = structure(c(1L, 
1L, 1L, 1L), .Label = "RAND0M", class = "factor"), Campaign.flight.1 = structure(c(16678, 
16678, 16678, 16678), class = "Date"), end.date = structure(c(16712, 
16712, 16712, 16712), class = "Date")), .Names = c("daysinflight", 
"dailyimpressions", "dailyspend", "Campaign.name", "Campaign.ID", 
"Campaign.flight", "start.date", "Campaign.flight.1", "end.date"
), row.names = c(NA, -4L), class = "data.frame")


标签: r date sumifs