#This is the R script for the second practical lesson of Evolutionary Ecology #First check which directory (aka "folder) is R using as a Working Directory getwd() #If that is not the directory where your data is at you need to change it setwd("D:/Users/User/Documents") #edit the name of the file, and column separator lizards <- read.table( "name_of_the_file_with_your_data.csv", header=TRUE, sep=",") #check the first rows of the file, useful to see if it looks like in the Excel head(lizards) # 1) WHICH KIND OF DATA? # Check which kind of variables you have str(lizards) #choose one continuous variable and check it lizards$replace_this_with_your_chosen_variable # 2) CHECK HOW IT LOOKS LIKE plot(lizards$your_chosen_variable) boxplot(your_chosen_variable~Population, data=lizards) boxplot(your_chosen_variable~Sex, data=lizards) boxplot(your_chosen_variable~Group, data=lizards) # 3) CHECK PRE-REQUISITES FOR THE TEST #check if distribution is normal # SHAPIRO-WILK TEST (The null-hypothesis of this test is that the population is normally distributed) shapiro.test (lizards$your_variable) # If it's not normal try a log transformation # this unskews the data log(lizards$your_variable) #redirect the output of "log()" to a new variable using either = or "<-" #now check Normality again using shapiro-wilk, but with this new variable you created #is it normal? # If it's not normal you will need to see how far from Normal is it # to see the predicted and real distribution we need a package # R has many additional functions that need to be installed # we need first to download and install it from a server, and the load it in R memory # try to install "fitdistrplus" install.packages("name of the package") #now you need to load the package in R environment library("again the name of the package you installed") # with this package we will simulate 1000 values from our data and see if the distribution fits the ideal distribution descdist(your_variable_here, boot=1000) # ANALYSE THEN YOUR VARIABLE # IS THERE SIGNIFICANT DIFFERENCES BETWEEN GROUPS? # We will use ANOVA (if Normal) or Kruskal_Wallis if not normal # For that whe need a grouping variable with 2 or more degrees of freedom str(lizards) #Which grouping variable? #Once you have both variables #If is was Normal analyse it with ANOVA, remember you need to use "summary()" to see the p-value aov(Name_of_the_normal_variable~Name_of_the_grouping_variable, data=lizards) # If it was not normal use Kruskal-Wallis test kruskal.test(Name_of_the_variable~Name_of_the_grouping_variable, data=lizards) #Remember that Kruskal wallis can also be used with non continuous data (integers) # Check the variable "Spots" #check other variables #if you feel adventurous you can try t.test, this works with only one degree of freedom, so you can check differences in variables with only two factors # use it to test differences between PK and PM (Populations), or between males and females t.test(One_Normal_Numerical_variable~Sex, data=lizards) t.test(One_Normal_Numerical_variable~Population, data=lizards)