##### Code for NutNet PI to use before submitting data######################## ############################################################################################################ #Instructions: # # (1) Save all data worksheets from template as individual *.csv files with the following format: # # yoursitename-sites-2009.csv (e.g. hart-sites-2009) # # yoursitename-plan-2009.csv (e.g. hart-plan-2009) # # yoursitename-cover-2009.csv (e.g. hart-cover-2009) # # yoursitename-taxa-2009.csv (e.g. hart-taxa-2009) # # yoursitename-mass-2009.csv (e.g. hart-biomass-2009) # # yoursitename-par-2009.csv (e.g. hart-par-2009) # # (2) Set working directory-note that R uses backslashes and Windows uses forward. Change to R format. # # (3) Run code-if you get errors before code finishes, you may have to check file name and column headings # # (4) Look at output files (pdf and .csv). Check for NA's, blanks, typos, boxplot outliers, misspellings # # (5) Fix mistakes in original *.csv file # # (6) Rerun code and recheck plots and output files. # # (7) Save corrected *csv data files and send to me (Lydia) # ############################################################################################################ ################### Set working directory########## #setwd("yourfilepath") # insert entire file path to the directory containing this code and data files #For example: # windows: setwd("~/NutNet/Data/QAQC NutNet") # mac and unix: setwd("~/NutNet/Data/QAQC NutNet") utils::setWindowTitle(paste("=", base::getwd())) #This line will display your working directory path on the top banner of the program ## Replace all instances of "yoursitename" in the code with your actual sitename ### windows: Use "Control R" to find the generic term "yoursitename" and replace with your actual site name ### mac: Use "Command F" #Clear all existing data and close graphics rm(list=ls()) ######Read in site data############################ tmp.site <- read.csv("yoursitename-sites-2009.csv", header=T, strip.white=T) names(tmp.site)[] <- tolower(names(tmp.site)) #confirm elevation is in meters tmp.site$elevation.meters.ck <- TRUE ###### Read in plan, cover and taxa data ########################## tmp.plan <- read.csv("yoursitename-plan-2009.csv", header=T, strip.white=T) names(tmp.plan)[] <- tolower(names(tmp.plan)) tmp.plan <- tmp.plan[c("block", "plot", "subplot", "n", "p","k","exclose", "treat_other_name", "treat_other_level")] tmp.cov <- read.csv("yoursitename-cover-2009.csv", header=T, strip.white=T) names(tmp.cov)[] <- tolower(names(tmp.cov)) tmp.cov$cover <- as.numeric(tmp.cov$cover) tmp.cov <- tmp.cov[c("taxa", "site", "block", "plot", "subplot", "cover", "date")] #Check for values that are too high # tmp.cov$cov.chk[tmp.cov$cover>100]<-"ERROR" tmp.tax <- read.csv("yoursitename-taxa-2009.csv", header=T, strip.white=T) tmp.tax <- tmp.tax[c("taxa", "Kingdom", "division", "family", "genus", "species", "variety", "lifeform", "lifespan", "provenance")] #Write full name of taxa to check for spelling tmp.tax$full.name <- as.factor(paste(tmp.tax$genus, tmp.tax$species, sep=" ")) #Check for invalid categorical data # tmp.tax$lifespan<-as.character(tmp.tax$lifespan) tmp.tax$lifespan <- ifelse(tmp.tax$lifespan %in% c("Annual","Perennial","Biennial","Indeterminate"),tmp.tax$lifespan,"ERROR") #Merge cover and taxa data # tmp.1 <- merge(tmp.cov, tmp.tax, by="taxa", all.x=T) cover <- merge(tmp.1, tmp.plan, by=c("block", "plot", "subplot")) #Check for essential missing data cover <- cover[!is.na(cover$plot),] cover <- cover[!is.na(cover$site),] cover <- cover[!is.na(cover$cover),] cover <- cover[!is.na(cover$taxa),] cover <- cover[cover$taxa != "",] cover <- cover[cover$taxa != " ",] summary(cover) ######Read in biomass data######################### tmp.mass <- read.csv("yoursitename-mass-2009.csv", header=T, strip.white=T) tmp.mass <- tmp.mass[c("taxa", "site", "block", "plot", "subplot", "mass", "date")] names(tmp.mass)[]<- tolower(names(tmp.mass)) #Units confirmed by site PI as g/0.2 m^2 tmp.mass$unit.ck <- TRUE # multiple biomass by 5 to get g/m^2. tmp.mass$mass <- tmp.mass$mass*5 mass <- tmp.mass #Check for essential missing data mass <- mass[!is.na(mass$plot),] mass <- mass[!is.na(mass$site),] mass <- mass[!is.na(mass$mass),] mass <- mass[!is.na(mass$taxa),] mass <- mass[mass$taxa != "",] mass <- mass[mass$taxa != " ",] summary(mass) ##### Read in PAR data ########################### tmp.par <- read.csv("yoursitename-par-2009.csv", header=T, strip.white=T, na.strings = "NA") names(tmp.par)[] <- tolower(names(tmp.par)) tmp.par$year <- 2009 tmp.par$block <- as.factor(tmp.par$block) tmp.par$plot <- as.factor(tmp.par$plot) tmp.par$subplot <- as.factor(tmp.par$subplot) #### check units are mmol m-2 s-1 tmp.par$unit.ck <- TRUE #### check for values that are too high tmp.par$par.chk[tmp.par$par>13000]<-"ERROR" par <- tmp.par mass$plot <- as.factor(mass$plot) mass$block <- as.factor(mass$block) cover$plot <- as.factor(cover$plot) cover$block <- as.factor(cover$block) #####output data to look at distribution for outliers########## pdf(file="nutnetdata-qaqc-plots2.pdf") par(mfrow=c(3,2)) boxplot(cover$cover, main='Testing cover data for outliers', ylab='Cover (%)', varwidth=F, notch=F) boxplot(cover$cover~cover$block,ylab='Cover (%)', varwidth=F, notch=F, main="Cover: check for outliers; \n By Block") boxplot(mass$mass, main='Testing biomass data for outliers', ylab='Biomass (grams/m^2)', varwidth=F, notch=F) boxplot(mass$mass~mass$block,ylab='Biomass (grams/m^2)', varwidth=F, notch=F, main="Mass: check for outliers; \n By Block") boxplot(par$par, main='Testing par data for outliers', ylab='Par(mmol m-2 s-1)', varwidth=F, notch=F) boxplot(par$par~par$location,ylab='Par(mmol m-2 s-1)', varwidth=F, notch=F, main="PAR: check for outliers; max PAR < ~2000;\n mean(ambient>ground); range(ambient< outliers in data