##Clean the existing environment
remove(list=ls(all=TRUE))
##Set pathing
path <- "C:\\R_Examples\\"
#************************************************************************************
# Example 1 - Selecting Y12 workers
# FAC code 6 is for Y12
# FAC1-Fac10 are checked to determine if they were ever employed at Y12
#************************************************************************************
##Read in ORISEWDS_d1
inputFile <- paste(path, "ORISEWDS_d1.csv", sep="")
dataOriginal <- read.table(nputFile, header=TRUE, sep=",")
##Subset the data to get all Y12 workers
y12Workers <- subset(dataOriginal, dataOriginal[,"fac1"] == "6" | dataOriginal[,"fac2"] == "6" |
dataOriginal[,"fac3"] == "6" | dataOriginal[,"fac4"] == "6" | dataOriginal[,"fac5"] == "6" |
dataOriginal[,"fac6"] == "6" | dataOriginal[,"fac7"] == "6" | dataOriginal[,"fac8"] == "6" |
dataOriginal[,"fac9"] == "6" | dataOriginal[,"fac10"] == "6")
##Subset all Y12 workers to get only the males
y12Males <- subset(y12Workers, y12Workers[,"sex"] == "M")
#************************************************************************************
# Example 2 - Selecting vital status for Y12 males
# Import the Master vital status file
#************************************************************************************
##Read in ORISEWDS_d6
inputFile <- paste(path, "ORISEWDS_d6.csv", sep="")
dataVital <- read.table(inputFile, header=TRUE, sep=",")
##Merge the two datasets
y12MalesVital <- merge(y12Males, dataVital, by = "id", all.x = TRUE)
#************************************************************************************
# Example 3 - Selecting deceased males at Y12 and their cause of death
# Import the Death file
#************************************************************************************
##Read in ORISEWDS_d7
inputFile <- paste(path, "ORISEWDS_d7.csv", sep=",")
dataDeath <- read.table(inputFile, header=TRUE, sep=",")
##Merge the two datasets
y12MalesDeath <- merge(y12Males, dataDeath, by="id")
#************************************************************************************
# Example 4 - Employment data for all males who were hired at
# Y12 between 1980 and 2000
# Import the Employment data
#************************************************************************************
##Read in ORISEWDS_d2
inputFile <- paste(path, "ORISEWDS_d2.csv", sep="")
dataEmployment <- read.csv(inputFile, header=TRUE, sep=",")
##Merge the two datasets
y12MalesEmploy <- merge(y12Males, dataEmployment, by="id")
## find rows to remove with hire years outside of range 1980-2000
rowsToRemove <- 0
for(i in 1:length(y12MalesEmploy[,1]))
{
hireDateLength <- nchar(as.character(y12MalesEmploy[i, "hiredate"]))
ifelse(y12MalesEmploy[i, "hiredate"] == "XXXXXXXXXX", yearHire <- 9999,
yearHire <- as.numeric(substr(y12MalesEmploy[i, "hiredate"],
hireDateLength-3, hireDateLength)))
if(yearHire < 1980 | yearHire > 2000)
rowsToRemove <- c(rowsToRemove, i)
}
##Remove rows
y12MalesEmployYears <- y12MalesEmploy[-rowsToRemove,]
#************************************************************************************
# Example 5 - Y12 males ever employed between 1980-2000 and their vital status
#************************************************************************************
##Pull out the unique ID's, maintaining them as a dataframe for the merge
y12MalesEmployUnique <- as.data.frame(unique(y12MalesEmployYears[,"id"]))
colnames(y12MalesEmployUnique) <- "id"
##Merge the two datasets
y12MalesEmployVital <- merge(y12MalesEmployUnique, y12MalesVital, by = "id", all.x = TRUE)
## find rows with NA status and drop them
rowsToRemove <- 0
for(i in 2:length(y12MalesEmployVital[,1]))
{
if(is.na(y12MalesEmployVital[i,"status"]))
rowsToRemove <- c(rowsToRemove, i)
}
##Remove rows
y12MalesEmployVital <- y12MalesEmployVital[-rowsToRemove,]