Checking if Date is Between two Dates in R -
i have 2 large datasets, df1 , df2. first dataset, df1, contains columns 'id' , 'actual.data'.
df1 <- data.frame(id=c(1,1,1,2,3,4,4), actual.date=c('10/01/1997','2/01/1998','5/01/2002','7/01/1999','9/01/2005','5/01/2006','2/03/2003')); dcis <- grep('date$',names(df1)); df1[dcis] <- lapply(df1[dcis],as.date,'%m/%d/%y'); df1; id actual.date 1 1 1997-10-01 2 1 1998-02-01 3 1 2002-05-01 4 2 1999-07-01 5 3 2005-09-01 6 4 2006-05-01 7 4 2003-02-03
the second dataset, df2, contains 2 date fields, 'before,date' , 'after.date', represent start , end date, respectively:
df2 <- data.frame(id=c(1,1,1,2,3,4,4,4), before.date=c('10/1/1996','1/1/1998','1/1/2000','1/1/2001','1/1/2001','1/1/2001','10/1/2004','10/3/2004'), after.date=c('12/1/1996','9/30/2003','12/31/2004','3/31/2006','9/30/2006','9/30/2005','12/30/2004','11/28/2004') ); dcis <- grep('date$',names(df2)); df2[dcis] <- lapply(df2[dcis],as.date,'%m/%d/%y'); df2; id before.date after.date 1 1 1996-10-01 1996-12-01 2 1 1998-01-01 2003-09-30 3 1 2000-01-01 2004-12-31 4 2 2001-01-01 2006-03-31 5 3 2001-01-01 2006-09-30 6 4 2001-01-01 2005-09-30 7 4 2004-10-01 2004-12-30 8 4 2004-10-03 2004-11-28
my goal create new column @ end of df1 named 'match' indicates whether 'actual.date' each row in df1 between ‘before.date’ , ‘after.date’ of of observations of same id in df2. if between, want give 'match' column value of 1, otherwise 0 (includes instances no 'id' match).
this output hoping for:
id before.date match 1 1 1997-10-01 0 2 1 1998-02-01 1 3 1 2002-05-01 1 4 2 1999-07-01 0 5 3 2005-09-01 1 7 4 2006-05-01 0 8 4 2003-02-03 1
i think can done for() loop, not knowledgeable r.
sample data:
df1 structure(list(cikcode = c("20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "1750"), auditorkey = c("4", "4", "5", "5", "5", "5", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "4"), yearendeddate = structure(c(4l, 4l, 2l, 2l, 3l, 3l, 5l, 5l, 6l, 6l, 7l, 7l, 8l, 8l, 9l, 9l, 10l, 10l, 11l, 1l), .label = c("2000-05-31", "2000-12-30", "2001-12-29", "2002-12-28", "2004-01-03", "2005-01-01", "2005-12-31", "2006-12-30", "2007-12-29", "2009-01-03", "2010-01-02" ), class = "factor"), source = structure(c(1l, 3l, 1l, 3l, 2l, 3l, 1l, 3l, 1l, 3l, 1l, 3l, 1l, 3l, 1l, 3l, 1l, 3l, 1l, 2l), .label = c("10-k", "10-k405", "def 14a"), class = "factor"), sourcedate = structure(c(6l, 7l, 2l, 3l, 4l, 5l, 8l, 9l, 10l, 20l, 11l, 12l, 13l, 14l, 15l, 16l, 17l, 18l, 19l, 1l), .label = c("2000-08-24", "2001-03-26", "2001-03-28", "2002-03-20", "2002-03-25", "2003-03-27", "2003-03-31", "2004-04-01", "2004-04-06", "2005-03-31", "2006-03-23", "2006-03-28", "2007-03-09", "2007-03-27", "2008-03-12", "2008-04-04", "2009-03-13", "2009-04-06", "2010-03-15", "2005-04-04"), class = "factor"), financialsdate = structure(c(4l, 4l, 2l, 2l, 3l, 3l, 5l, 5l, 6l, 6l, 7l, 7l, 8l, 8l, 9l, 9l, 10l, 10l, 11l, 1l), .label = c("2000-05-31", "2000-12-30", "2001-12-29", "2002-12-28", "2004-01-03", "2005-01-01", "2005-12-31", "2006-12-30", "2007-12-29", "2009-01-03", "2010-01-02" ), class = "factor"), auditopinionkey = c("3538", "na", "66900", "na", "78252", "na", "39225", "na", "84748", "na", "102979", "na", "120889", "na", "148621", "na", "171604", "na", "192814", "156138"), auditorstatecode = structure(c(2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, 1l), .label = c("il", "pa"), class = "factor"), auditorstatename = structure(c(2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, na, 2l, 1l), .label = c("illinois", "pennsylvania"), class = "factor"), goingconcern = structure(c(1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, 1l), .label = "no", class = "factor"), goingconcernissuekeylist = structure(c(1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, 1l ), .label = "", class = "factor"), goingconcernissuephraselist = structure(c(1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, 1l), .label = "", class = "factor"), isadditionalopinion = structure(c(1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, na, 1l, 1l), .label = "no", class = "factor"), restatement = c("na", "0", "na", "0", "na", "0", "na", "0", "na", "0", "na", "0", "na", "0", "na", "0", "na", "0", "0", "na"), yearended = c("na", "2002", "na", "2000", "na", "2001", "na", "2003", "na", "2004", "na", "2005", "na", "2006", "na", "2007", "na", "2008", "2009", "na"), assets = c("50,459,000", "50,459,000", "54,421,000", "54,421,000", "47,644,000", "47,644,000", "83,081,000", "83,081,000", "93,016,000", "93,016,000", "89,110,000", "89,110,000", "140,996,000", "140,996,000", "184,118,000", "184,118,000", "199,444,000", "199,444,000", "204,236,000", "740,998,000"), auditfees = c("123,700", "123,700", "200,000", "200,000", "185,000", "185,000", "137,100", "137,100", "225,000", "225,000", "244,000", "244,000", "574,000", "574,000", "669,000", "669,000", "680,000", "680,000", "643,000", "na"), auditor = c("kpmg llp", "kpmg llp", "arthur andersen llp", "arthur andersen llp", "arthur andersen llp", "arthur andersen llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "grant thornton llp", "kpmg llp"), earnings = c("3,284,000", "3,284,000", "5,838,000", "5,838,000", "1,048,000", "1,048,000", "na", "", "na", "", "na", "", "na", "", "21,321,000", "21,321,000", "25,773,000", "25,773,000", "21,555,000", "35,163,000"), naicscode = c("334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334513", "334613"), revenue = c("68,231,000", "68,231,000", "84,912,000", "84,912,000", "71,819,000", "71,819,000", "94,676,000", "94,676,000", "112,494,000", "112,494,000", "118,940,000", "118,940,000", "148,127,008", "148,127,008", "201,677,000", "201,677,000", "243,018,000", "243,018,000", "190,774,000", "1,024,333,000"), siccode = c("3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823", "3720"), statecode = c("nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "nj", "il"), statename = c("new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "new jersey", "illinois"), ticker = c("na", "", "na", "", "na", "", "na", "", "na", "", "na", "", "na", "", "na", "", "na", "", "", "air")), .names = c("cikcode", "auditorkey", "yearendeddate", "source", "sourcedate", "financialsdate", "auditopinionkey", "auditorstatecode", "auditorstatename", "goingconcern", "goingconcernissuekeylist", "goingconcernissuephraselist", "isadditionalopinion", "restatement", "yearended", "assets", "auditfees", "auditor", "earnings", "naicscode", "revenue", "siccode", "statecode", "statename", "ticker"), row.names = c(na, 20l), class = "data.frame") df2 structure(list(cikcode = c(320193l, 72971l, 72971l, 200406l, 40545l, 40545l, 1114448l, 19617l, 19617l, 1067983l, 70858l, 313807l, 1578845l, 1113172l, 64803l, 1135644l, 731766l, 14272l, 14272l, 66740l), auditoratdisclosuredate = c("kpmg llp", "kpmg llp", "kpmg llp", "pricewaterhousecoopers llp", "kpmg llp", "kpmg llp", "pricewaterhousecoopers llp", "pricewaterhousecoopers llp", "pricewaterhousecoopers llp", "deloitte & touche llp", "pricewaterhousecoopers llp", "ernst & young llp", "pricewaterhousecoopers llp", "pricewaterhousecoopers llp", "kpmg llp", "kpmg llp", "deloitte & touche llp", "pricewaterhousecoopers llp", "pricewaterhousecoopers llp", "pricewaterhousecoopers llp"), auditoratdisclosuredatekey = c("4", "4", "4", "1", "4", "4", "1", "1", "1", "3", "1", "2", "1", "1", "4", "4", "3", "1", "1", "1"), auditorduringrestatedperiod = c("|kpmg llp|", "|kpmg llp|", "|kpmg llp|", "|pricewaterhousecoopers llp|", "|kpmg llp|", "|kpmg llp|", "|pricewaterhousecoopers llp|pricewaterhousecoopers llp (pricewaterhousecoopers ag, switzerland)|", "|pricewaterhousecoopers llp|", "|pricewaterhousecoopers llp|", "|deloitte & touche llp|", "|pricewaterhousecoopers llp|pricewaterhousecoopers llp (price waterhouse & co srl, argentina)|", "|ernst & young llp|ernst & young llp (ernst & young llp, united kingdom)|", "|pricewaterhousecoopers llp|", "|deloitte & touche llp (deloitte touche tohmatsu auditores independentes, brazil)|deloitte & touche llp (kpmg auditores independentes, brazil)|kpmg llp (deloitte touche tohmatsu auditores independentes, brazil)|kpmg llp (kpmg auditores independentes, brazil)|", "|kpmg llp|", "|kpmg llp (kpmg deutsche treuhand-gesellschaft ag wpg, germany)|", "|arthur andersen llp|deloitte & touche llp|", "|pricewaterhousecoopers llp|", "|pricewaterhousecoopers llp|", "|pricewaterhousecoopers llp|" ), auditorduringrestatedperiodkeys = c("|4|", "|4|", "|4|", "|1|", "|4|", "|4|", "|1|", "|1|", "|1|", "|3|", "|1|", "|2|", "|1|", "|3|4|", "|4|", "|4|", "|5|3|", "|1|", "|1|", "|1|" ), auditoropinionperiodendduringrestatedperiod = c("|kpmg llp|", "|kpmg llp|", "|kpmg llp|", "", "|kpmg llp|", "|kpmg llp|", "|pricewaterhousecoopers llp[pricewaterhousecoopers ag]|", "|pricewaterhousecoopers llp|", "", "", "|pricewaterhousecoopers llp|", "|ernst & young llp[ernst & young llp]|", "|pricewaterhousecoopers llp|", "|kpmg llp[kpmg auditores independentes]|", "", "|kpmg llp[kpmg deutsche treuhand-gesellschaft ag wpg]|", "|arthur andersen llp|deloitte & touche llp|", "|pricewaterhousecoopers llp|", "|pricewaterhousecoopers llp|", "|pricewaterhousecoopers llp|" ), auditoropinionperiodendduringrestatedperiodkeys = c("|4|", "|4|", "|4|", "", "|4|", "|4|", "|1|", "|1|", "", "", "|1|", "|2|", "|1|", "|4|", "", "|4|", "|5|3|", "|1|", "|1|", "|1|" ), restatementkey = c("10196", "12617", "42632", "44280", "857", "22310", "7995", "10187", "38972", "5379", "6357", "9495", "49566", "32005", "45575", "15890", "13410", "695", "3752", "36923"), restatedperiodbegin = c("1996-10-01", "1998-01-01", "2005-01-01", "2012-01-01", "2001-01-01", "2001-01-01", "2003-01-01", "2003-01-01", "2012-01-01", "2001-07-01", "2001-01-01", "2000-01-01", "2014-01-01", "2007-01-01", "1997-01-01", "2000-10-01", "1994-01-01", "2001-01-01", "1999-01-01", "2010-01-01"), restatedperiodended = c("2006-04-01", "2003-09-30", "2007-09-30", "2012-07-01", "2006-03-31", "2006-09-30", "2004-12-31", "2006-03-31", "2012-03-31", "2001-09-30", "2005-09-30", "2004-12-31", "2014-12-31", "2008-12-31", "1998-12-31", "2006-09-30", "2006-09-30", "2003-09-30", "2002-06-30", "2011-03-31"), disclosure = c("press release", "10-q/a", "10-k", "10-q", "8-k", "8-k", "20-f", "8-k", "8-k", "10-q/a", "8-k", "20-f/a", "10-q", "20-f", "8-k", "20-f", "8-k", "8-k", "press release", "10-q"), disclosuredate = c("2006-06-29", "2004-01-16", "2008-02-29", "2012-11-09", "2005-05-06", "2007-01-19", "2006-01-30", "2006-08-03", "2012-07-13", "2002-03-15", "2006-02-22", "2006-06-13", "2015-05-11", "2010-04-21", "1999-11-15", "2006-12-11", "2006-04-07", "2004-01-29", "2002-10-22", "2011-08-04"), cumulativechangeinnetincome = c(nan, nan, 0, 0, 3.04344437838208e-315, nan, nan, 0, nan, 0, 2.66795448754273e-315, 0, 0, 0, 0, nan, nan, nan, nan, 0), accountingrulegaapfasbapplicationfailures = c("|deferred, stock-based and/or executive comp issues|deferred, stock-based options backdating (subcategory)|", "|lease, sfas 5, legal, contingency , commitment issues|lease, leasehold , fas 13 (98) (subcategory)|", "|cash flow statement (sfas 95) classification errors|lease, sfas 5, legal, contingency , commitment issues|", "|cash flow statement (sfas 95) classification errors|", "|financial derivatives/hedging (fas 133) acct issues|", "|financial derivatives/hedging (fas 133) acct issues|foreign, related party, affiliated, or subsidiary issues|", "|ppe intangible or fixed asset (value/diminution) issues|acquisitions, mergers, disposals, re-org acct issues|consolidation issues incl fin 46 variable interest & off-b/s|deferred, stock-based and/or executive comp issues|tax expense/benefit/deferral/other (fas 109) issues|gaap - changes in acct principles fasb/eitf or foreign gaap|acquisitions, mergers, (subcategory) acct issues|ppe issues - intangible assets, goodwill (subcategory)|pension , other post-retirement benefit issues|", "|accounts/loans receivable, investments & cash issues|cash flow statement (sfas 95) classification errors|", "|revenue recognition issues|", "|consolidation issues incl fin 46 variable interest & off-b/s|intercompany, investment in subs./affiliate issues|", "|financial derivatives/hedging (fas 133) acct issues|", "|revenue recognition issues|financial derivatives/hedging (fas 133) acct issues|inventory, vendor and/or cost of sales issues|gaap - changes in acct principles fasb/eitf or foreign gaap|", "|debt, quasi-debt, warrants & equity ( bcf) security issues|foreign, related party, affiliated, or subsidiary issues|cash flow statement (sfas 95) classification errors|foreign, subsidiary issues (subcategory)|", "", "|acquisitions, mergers, disposals, re-org acct issues|acquisitions, mergers, (subcategory) acct issues|", "|tax expense/benefit/deferral/other (fas 109) issues|", "|deferred, stock-based and/or executive comp issues|deferred, stock-based options backdating (subcategory)|", "|acquisitions, mergers, disposals, re-org acct issues|consolidation issues incl fin 46 variable interest & off-b/s|accounts/loans receivable, investments & cash issues|tax expense/benefit/deferral/other (fas 109) issues|fin statement, footnote & segment disclosure issues|pension , other post-retirement benefit issues|", "|revenue recognition issues|accounts/loans receivable, investments & cash issues|inventory, vendor and/or cost of sales issues|", "|cash flow statement (sfas 95) classification errors|"), financialfraudirregularitiesandmisrepresentations = c("", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""), errorsaccountingandclericalapplications = c("", "", "", "", "", "", "", "", "", "", "", "", "", "|eps, ratio , classification of income statement issues|", "", "", "", "", "", ""), othersignificantissues = c("", "", "", "", "", "", "", "", "|z - material weakness - section 404 or 302 issues identified|", "", "", "", "", "", "", "", "", "", "|y - loan covenant violations/issues|", ""), secinvestigation = c("", "", "y", "", "y", "y", "", "", "", "", "", "", "", "", "", "", "", "", "", "")), .names = c("cikcode", "auditoratdisclosuredate", "auditoratdisclosuredatekey", "auditorduringrestatedperiod", "auditorduringrestatedperiodkeys", "auditoropinionperiodendduringrestatedperiod", "auditoropinionperiodendduringrestatedperiodkeys", "restatementkey", "restatedperiodbegin", "restatedperiodended", "disclosure", "disclosuredate", "cumulativechangeinnetincome", "accountingrulegaapfasbapplicationfailures", "financialfraudirregularitiesandmisrepresentations", "errorsaccountingandclericalapplications", "othersignificantissues", "secinvestigation"), row.names = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20"), class = "data.frame")
you may use foverlaps
data.table
. convert both 'data.frame's 'data.table' 'start/end' columns. set key column column names of each dataset. use foverlaps
numeric index can converted binary 'match' based on na values in it.
library(data.table)#v1.9.5+ dt1 <- data.table(id=df1$id, start=df1$actual.date, end=df1$actual.date) setkeyv(dt1, colnames(dt1)) dt2 <- as.data.table(df2) setnames(dt2, 2:3, c('start', 'end')) setkeyv(dt2, colnames(dt2)) indx <- foverlaps(dt1, dt2, type='within', which=true, mult='first') dt1[, match:= +(!is.na(indx))][,end:=null] setnames(dt1, 1:2, colnames(df1)) dt1 # id actual.date match #1: 1 1997-10-01 0 #2: 1 1998-02-01 1 #3: 1 2002-05-01 1 #4: 2 1999-07-01 0 #5: 3 2005-09-01 1 #6: 4 2003-02-03 1 #7: 4 2006-05-01 0
Comments
Post a Comment