#This is the R script for the second practical lesson of Evolutionary Ecology

#First check which directory (aka "folder) is R using as a Working Directory
getwd()

#If that is not the directory where your data is at you need to change it
setwd("D:/Users/User/Documents")

#edit the name of the file, and column separator
lizards <- read.table( "name_of_the_file_with_your_data.csv", header=TRUE, sep=",")

#check the first rows of the file, useful to see if it looks like in the Excel
head(lizards)


#  1)  WHICH KIND OF DATA?

# Check which kind of variables you have
str(lizards)

#choose one continuous variable and check it
lizards$replace_this_with_your_chosen_variable


# 2) CHECK HOW IT LOOKS LIKE
plot(lizards$your_chosen_variable)
boxplot(your_chosen_variable~Population, data=lizards)
boxplot(your_chosen_variable~Sex, data=lizards)
boxplot(your_chosen_variable~Group, data=lizards)

# 3) CHECK PRE-REQUISITES FOR THE TEST


#check if distribution is normal

# SHAPIRO-WILK TEST (The null-hypothesis of this test is that the population is normally distributed)

shapiro.test (lizards$your_variable)

# If it's not normal try a log transformation
# this unskews the data

log(lizards$your_variable)


#redirect the output of "log()" to a new variable using either = or "<-"


#now check Normality again using shapiro-wilk, but with this new variable you created

#is it normal?


# If it's not normal you will need to see how far from Normal is it
# to see the predicted and real distribution we need a package
# R has many additional functions that need to be installed
# we need first to download and install it from a server, and the load it in R memory

# try to install "fitdistrplus"
install.packages("name of the package")


#now you need to load the package in R environment

library("again the name of the package you installed")

# with this package we will simulate 1000 values from our data and see if the distribution fits the ideal distribution
descdist(your_variable_here, boot=1000)

# ANALYSE THEN YOUR VARIABLE
# IS THERE SIGNIFICANT DIFFERENCES BETWEEN GROUPS?

# We will use ANOVA (if Normal) or Kruskal_Wallis if not normal
# For that whe need a grouping variable with 2 or more degrees of freedom
str(lizards)

#Which grouping variable?


#Once you have both variables
#If is was Normal analyse it with ANOVA, remember you need to use "summary()" to see the p-value
aov(Name_of_the_normal_variable~Name_of_the_grouping_variable, data=lizards)


# If it was not normal use Kruskal-Wallis test
kruskal.test(Name_of_the_variable~Name_of_the_grouping_variable, data=lizards)


#Remember that Kruskal wallis can also be used with non continuous data (integers)
# Check the variable "Spots"


#check other variables


#if you feel adventurous you can try t.test, this works with only one degree of freedom, so you can check differences in variables with only two factors
# use it to test differences between PK and PM (Populations), or between males and females

t.test(One_Normal_Numerical_variable~Sex, data=lizards)
t.test(One_Normal_Numerical_variable~Population, data=lizards)