I have two large datasets, df1 and df2. The first dataset, df1, contains the columns 'ID' and 'actual.data'.
df1 <- data.frame(ID=c(1,1,1,2,3,4,4), actual.date=c('10/01/1997','2/01/1998','5/01/2002','7/01/1999','9/01/2005','5/01/2006','2/03/2003'));
dcis <- grep('date$',names(df1));
df1[dcis] <- lapply(df1[dcis],as.Date,'%m/%d/%Y');
df1;
ID actual.date
1 1 1997-10-01
2 1 1998-02-01
3 1 2002-05-01
4 2 1999-07-01
5 3 2005-09-01
6 4 2006-05-01
7 4 2003-02-03
The second dataset, df2, contains two date fields, 'before,date' and 'after.date', which represent a start and end date, respectively:
df2 <- data.frame(ID=c(1,1,1,2,3,4,4,4), before.date=c('10/1/1996','1/1/1998','1/1/2000','1/1/2001','1/1/2001','1/1/2001','10/1/2004','10/3/2004'), after.date=c('12/1/1996','9/30/2003','12/31/2004','3/31/2006','9/30/2006','9/30/2005','12/30/2004','11/28/2004') );
dcis <- grep('date$',names(df2));
df2[dcis] <- lapply(df2[dcis],as.Date,'%m/%d/%Y');
df2;
ID before.date after.date
1 1 1996-10-01 1996-12-01
2 1 1998-01-01 2003-09-30
3 1 2000-01-01 2004-12-31
4 2 2001-01-01 2006-03-31
5 3 2001-01-01 2006-09-30
6 4 2001-01-01 2005-09-30
7 4 2004-10-01 2004-12-30
8 4 2004-10-03 2004-11-28
My goal is to create a new column at the end of df1 named 'match' that indicates whether the 'actual.date' for each row in df1 is between the ‘before.date’ and ‘after.date’ of any of the observations of the same ID in df2. If it is between, I want to give the 'match' column a value of 1, otherwise 0 (includes instances with no 'ID' match).
This is the output I am hoping for:
ID before.date match
1 1 1997-10-01 0
2 1 1998-02-01 1
3 1 2002-05-01 1
4 2 1999-07-01 0
5 3 2005-09-01 1
7 4 2006-05-01 0
8 4 2003-02-03 1
I think this can be done with a for() loop, but I am not too knowledgeable about R.
Sample Data:
DF1
structure(list(cikcode = c("20", "20", "20", "20", "20", "20",
"20", "20", "20", "20", "20", "20", "20", "20", "20", "20", "20",
"20", "20", "1750"), auditorkey = c("4", "4", "5", "5", "5",
"5", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6", "6",
"6", "4"), yearendeddate = structure(c(4L, 4L, 2L, 2L, 3L, 3L,
5L, 5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 1L), .Label = c("2000-05-31",
"2000-12-30", "2001-12-29", "2002-12-28", "2004-01-03", "2005-01-01",
"2005-12-31", "2006-12-30", "2007-12-29", "2009-01-03", "2010-01-02"
), class = "factor"), source = structure(c(1L, 3L, 1L, 3L, 2L,
3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 3L, 1L, 2L), .Label = c("10-K",
"10-K405", "DEF 14A"), class = "factor"), sourcedate = structure(c(6L,
7L, 2L, 3L, 4L, 5L, 8L, 9L, 10L, 20L, 11L, 12L, 13L, 14L, 15L,
16L, 17L, 18L, 19L, 1L), .Label = c("2000-08-24", "2001-03-26",
"2001-03-28", "2002-03-20", "2002-03-25", "2003-03-27", "2003-03-31",
"2004-04-01", "2004-04-06", "2005-03-31", "2006-03-23", "2006-03-28",
"2007-03-09", "2007-03-27", "2008-03-12", "2008-04-04", "2009-03-13",
"2009-04-06", "2010-03-15", "2005-04-04"), class = "factor"),
financialsdate = structure(c(4L, 4L, 2L, 2L, 3L, 3L, 5L,
5L, 6L, 6L, 7L, 7L, 8L, 8L, 9L, 9L, 10L, 10L, 11L, 1L), .Label = c("2000-05-31",
"2000-12-30", "2001-12-29", "2002-12-28", "2004-01-03", "2005-01-01",
"2005-12-31", "2006-12-30", "2007-12-29", "2009-01-03", "2010-01-02"
), class = "factor"), auditopinionkey = c("3538", "NA", "66900",
"NA", "78252", "NA", "39225", "NA", "84748", "NA", "102979",
"NA", "120889", "NA", "148621", "NA", "171604", "NA", "192814",
"156138"), auditorstatecode = structure(c(2L, NA, 2L, NA,
2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L,
1L), .Label = c("IL", "PA"), class = "factor"), auditorstatename = structure(c(2L,
NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA, 2L, NA,
2L, NA, 2L, 1L), .Label = c("ILLINOIS", "PENNSYLVANIA"), class = "factor"),
goingconcern = structure(c(1L, NA, 1L, NA, 1L, NA, 1L, NA,
1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, 1L), .Label = "No", class = "factor"),
goingconcernissuekeylist = structure(c(1L, NA, 1L, NA, 1L,
NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, 1L
), .Label = "", class = "factor"), goingconcernissuephraselist = structure(c(1L,
NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA,
1L, NA, 1L, 1L), .Label = "", class = "factor"), isadditionalopinion = structure(c(1L,
NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA, 1L, NA,
1L, NA, 1L, 1L), .Label = "No", class = "factor"), restatement = c("NA",
"0", "NA", "0", "NA", "0", "NA", "0", "NA", "0", "NA", "0",
"NA", "0", "NA", "0", "NA", "0", "0", "NA"), yearended = c("NA",
"2002", "NA", "2000", "NA", "2001", "NA", "2003", "NA", "2004",
"NA", "2005", "NA", "2006", "NA", "2007", "NA", "2008", "2009",
"NA"), assets = c("50,459,000", "50,459,000", "54,421,000",
"54,421,000", "47,644,000", "47,644,000", "83,081,000", "83,081,000",
"93,016,000", "93,016,000", "89,110,000", "89,110,000", "140,996,000",
"140,996,000", "184,118,000", "184,118,000", "199,444,000",
"199,444,000", "204,236,000", "740,998,000"), auditfees = c("123,700",
"123,700", "200,000", "200,000", "185,000", "185,000", "137,100",
"137,100", "225,000", "225,000", "244,000", "244,000", "574,000",
"574,000", "669,000", "669,000", "680,000", "680,000", "643,000",
"NA"), auditor = c("KPMG LLP", "KPMG LLP", "Arthur Andersen LLP",
"Arthur Andersen LLP", "Arthur Andersen LLP", "Arthur Andersen LLP",
"Grant Thornton LLP", "Grant Thornton LLP", "Grant Thornton LLP",
"Grant Thornton LLP", "Grant Thornton LLP", "Grant Thornton LLP",
"Grant Thornton LLP", "Grant Thornton LLP", "Grant Thornton LLP",
"Grant Thornton LLP", "Grant Thornton LLP", "Grant Thornton LLP",
"Grant Thornton LLP", "KPMG LLP"), earnings = c("3,284,000",
"3,284,000", "5,838,000", "5,838,000", "1,048,000", "1,048,000",
"NA", "", "NA", "", "NA", "", "NA", "", "21,321,000", "21,321,000",
"25,773,000", "25,773,000", "21,555,000", "35,163,000"),
naicscode = c("334513", "334513", "334513", "334513", "334513",
"334513", "334513", "334513", "334513", "334513", "334513",
"334513", "334513", "334513", "334513", "334513", "334513",
"334513", "334513", "334613"), revenue = c("68,231,000",
"68,231,000", "84,912,000", "84,912,000", "71,819,000", "71,819,000",
"94,676,000", "94,676,000", "112,494,000", "112,494,000",
"118,940,000", "118,940,000", "148,127,008", "148,127,008",
"201,677,000", "201,677,000", "243,018,000", "243,018,000",
"190,774,000", "1,024,333,000"), siccode = c("3823", "3823",
"3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823",
"3823", "3823", "3823", "3823", "3823", "3823", "3823", "3823",
"3823", "3720"), statecode = c("NJ", "NJ", "NJ", "NJ", "NJ",
"NJ", "NJ", "NJ", "NJ", "NJ", "NJ", "NJ", "NJ", "NJ", "NJ",
"NJ", "NJ", "NJ", "NJ", "IL"), statename = c("NEW JERSEY",
"NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "NEW JERSEY",
"NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "NEW JERSEY",
"NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "NEW JERSEY",
"NEW JERSEY", "NEW JERSEY", "NEW JERSEY", "ILLINOIS"), ticker = c("NA",
"", "NA", "", "NA", "", "NA", "", "NA", "", "NA", "", "NA",
"", "NA", "", "NA", "", "", "AIR")), .Names = c("cikcode",
"auditorkey", "yearendeddate", "source", "sourcedate", "financialsdate",
"auditopinionkey", "auditorstatecode", "auditorstatename", "goingconcern",
"goingconcernissuekeylist", "goingconcernissuephraselist", "isadditionalopinion",
"restatement", "yearended", "assets", "auditfees", "auditor",
"earnings", "naicscode", "revenue", "siccode", "statecode", "statename",
"ticker"), row.names = c(NA, 20L), class = "data.frame")
DF2
structure(list(cikcode = c(320193L, 72971L, 72971L, 200406L,
40545L, 40545L, 1114448L, 19617L, 19617L, 1067983L, 70858L, 313807L,
1578845L, 1113172L, 64803L, 1135644L, 731766L, 14272L, 14272L,
66740L), auditoratdisclosuredate = c("KPMG LLP", "KPMG LLP",
"KPMG LLP", "PricewaterhouseCoopers LLP", "KPMG LLP", "KPMG LLP",
"PricewaterhouseCoopers LLP", "PricewaterhouseCoopers LLP", "PricewaterhouseCoopers LLP",
"Deloitte & Touche LLP", "PricewaterhouseCoopers LLP", "Ernst & Young LLP",
"PricewaterhouseCoopers LLP", "PricewaterhouseCoopers LLP", "KPMG LLP",
"KPMG LLP", "Deloitte & Touche LLP", "PricewaterhouseCoopers LLP",
"PricewaterhouseCoopers LLP", "PricewaterhouseCoopers LLP"),
auditoratdisclosuredatekey = c("4", "4", "4", "1", "4", "4",
"1", "1", "1", "3", "1", "2", "1", "1", "4", "4", "3", "1",
"1", "1"), auditorduringrestatedperiod = c("|KPMG LLP|",
"|KPMG LLP|", "|KPMG LLP|", "|PricewaterhouseCoopers LLP|",
"|KPMG LLP|", "|KPMG LLP|", "|PricewaterhouseCoopers LLP|PricewaterhouseCoopers LLP (PricewaterhouseCoopers AG, SWITZERLAND)|",
"|PricewaterhouseCoopers LLP|", "|PricewaterhouseCoopers LLP|",
"|Deloitte & Touche LLP|", "|PricewaterhouseCoopers LLP|PricewaterhouseCoopers LLP (Price Waterhouse & Co SRL, ARGENTINA)|",
"|Ernst & Young LLP|Ernst & Young LLP (Ernst & Young LLP, UNITED KINGDOM)|",
"|PricewaterhouseCoopers LLP|", "|Deloitte & Touche LLP (Deloitte Touche Tohmatsu Auditores Independentes, BRAZIL)|Deloitte & Touche LLP (KPMG Auditores Independentes, BRAZIL)|KPMG LLP (Deloitte Touche Tohmatsu Auditores Independentes, BRAZIL)|KPMG LLP (KPMG Auditores Independentes, BRAZIL)|",
"|KPMG LLP|", "|KPMG LLP (KPMG Deutsche Treuhand-Gesellschaft AG WPG, GERMANY)|",
"|Arthur Andersen LLP|Deloitte & Touche LLP|", "|PricewaterhouseCoopers LLP|",
"|PricewaterhouseCoopers LLP|", "|PricewaterhouseCoopers LLP|"
), auditorduringrestatedperiodkeys = c("|4|", "|4|", "|4|",
"|1|", "|4|", "|4|", "|1|", "|1|", "|1|", "|3|", "|1|", "|2|",
"|1|", "|3|4|", "|4|", "|4|", "|5|3|", "|1|", "|1|", "|1|"
), auditoropinionperiodendduringrestatedperiod = c("|KPMG LLP|",
"|KPMG LLP|", "|KPMG LLP|", "", "|KPMG LLP|", "|KPMG LLP|",
"|PricewaterhouseCoopers LLP[PricewaterhouseCoopers AG]|",
"|PricewaterhouseCoopers LLP|", "", "", "|PricewaterhouseCoopers LLP|",
"|Ernst & Young LLP[Ernst & Young LLP]|", "|PricewaterhouseCoopers LLP|",
"|KPMG LLP[KPMG Auditores Independentes]|", "", "|KPMG LLP[KPMG Deutsche Treuhand-Gesellschaft AG WPG]|",
"|Arthur Andersen LLP|Deloitte & Touche LLP|", "|PricewaterhouseCoopers LLP|",
"|PricewaterhouseCoopers LLP|", "|PricewaterhouseCoopers LLP|"
), auditoropinionperiodendduringrestatedperiodkeys = c("|4|",
"|4|", "|4|", "", "|4|", "|4|", "|1|", "|1|", "", "", "|1|",
"|2|", "|1|", "|4|", "", "|4|", "|5|3|", "|1|", "|1|", "|1|"
), restatementkey = c("10196", "12617", "42632", "44280",
"857", "22310", "7995", "10187", "38972", "5379", "6357",
"9495", "49566", "32005", "45575", "15890", "13410", "695",
"3752", "36923"), restatedperiodbegin = c("1996-10-01", "1998-01-01",
"2005-01-01", "2012-01-01", "2001-01-01", "2001-01-01", "2003-01-01",
"2003-01-01", "2012-01-01", "2001-07-01", "2001-01-01", "2000-01-01",
"2014-01-01", "2007-01-01", "1997-01-01", "2000-10-01", "1994-01-01",
"2001-01-01", "1999-01-01", "2010-01-01"), restatedperiodended = c("2006-04-01",
"2003-09-30", "2007-09-30", "2012-07-01", "2006-03-31", "2006-09-30",
"2004-12-31", "2006-03-31", "2012-03-31", "2001-09-30", "2005-09-30",
"2004-12-31", "2014-12-31", "2008-12-31", "1998-12-31", "2006-09-30",
"2006-09-30", "2003-09-30", "2002-06-30", "2011-03-31"),
disclosure = c("P