<- 10
x x
[1] 10
The following are common useful commands used in R, with examples of their use.
<-
/ =
- save a value as an object. On Mac, the keyboard shortcut is OPTION
+-
. Windows can be formatted so that the shortcut is ALT
+-
.<- 10
x x
[1] 10
|>
- “pipe” a command or output into another command. You can use the shortcut CTRL
+SHIFT
+M
on Windows or Mac.# make repeatable
set.seed(930)
# random string
<- rnorm(20)
x
|>
x # pass to summary
summary() |>
# pass summary through round
round(2)
Min. 1st Qu. Median Mean 3rd Qu. Max.
-1.94 -0.48 -0.06 -0.05 0.36 1.80
->
- save a value as an object at the end of a pipeline# same pipeline as previous, but more sequential
# get random values
rnorm(20) |>
# pass to summary
summary() |>
# pass summary through round
round(2) ->
# save as x
x
x
Min. 1st Qu. Median Mean 3rd Qu. Max.
-2.60 -0.77 -0.24 -0.18 0.57 1.73
c
- concatenate, place two values together<- c(10,11)
x x
[1] 10 11
For these examples, we will create a random vector of number to demonstrate how they work.
<- rnorm(1000) x
mean
- get the mean / average of a set of datamean(x)
[1] 0.05326273
The following functions are those developed for this class or adapted from code posted to sources like StackOverflow.
For these basic stats, we are using the following example data:
### EXAMPLE DATA ###
<- c(1,2,3,5,5,4,6,8,7,9,5) x
# Based on Statology function
# define function to calculate mode
# works on vectors of data
<- function(x) {
find_mode # get unique values from vector
<- unique(x)
u # count number of occurrences for each value
<- tabulate(match(x, u))
tab
# if no mode, say so
if(length(x)==length(u[tab == max(tab)])){
print("No mode.")
else{
}# return the value with the highest count
== max(tab)]
u[tab
}
}
find_mode(x)
[1] 5
<- function(x){
se <- length(x) # calculate n
n <- sd(x) # calculate standard deviation
s <- s/sqrt(n)
se_val return(se_val)
}
se(x)
[1] 0.7385489
<- function(x){
cv <- sd(x)
sigma <- mean(x)
mu <- sigma/mu*100
val return(val)
}
cv(x)
[1] 48.98979
Remember - in the \(Z\)-score code below, if no \(n\) is specified, then it will default to \(n = 1\).
<- function(xbar, mu, sd.x, n = 1){
zscore <- (xbar - mu)/(sd.x/sqrt(n))
z return(z)
}
zscore(xbar = 62,
mu = 65,
sd.x = 3.5,
n = 5)
[1] -1.91663
The following example data are going to be used to illustrate these functions.
#### EXAMPLE DATA ####
set.seed(8675309)
for(i in 1:4){
<- rnorm(10)
x if(i == 1){
<- rnorm(10, mean = 2)
x <- x |> as.data.frame()
data colnames(data) <- "Response"
$Explanatory <- paste0("x",i)
dataelse{
}<- x |> as.data.frame()
newdat colnames(newdat) <- "Response"
$Explanatory <- paste0("x",i)
newdat<- rbind(data,newdat)
data
}
}
# split into "typical" table
<- NULL
expanded_data $x1 <- data$Response[which(data$Explanatory=="x1")]
expanded_data$x2 <- data$Response[which(data$Explanatory=="x2")]
expanded_data$x3 <- data$Response[which(data$Explanatory=="x3")]
expanded_data$x4 <- data$Response[which(data$Explanatory=="x4")]
expanded_data
<- expanded_data |>
expanded_data as.data.frame()
The above is a one-way ANOVA. As a reminder, we would calculate the test as follows:
# pivot longer does not work for one-way ANOVA, requires blocking factor
# can rbind things with same colnames to make longer
<- aov(Response ~ Explanatory, data)
example_aov
summary(example_aov)
Df Sum Sq Mean Sq F value Pr(>F)
Explanatory 3 23.40 7.801 11.54 1.89e-05 ***
Residuals 36 24.33 0.676
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Above, we can see a significant result of the ANOVA. We can follow this up with a Tukey test. This requires the package agricolae
! Check the ANOVA pages, however, as not all ANOVA can use this agricolae
shortcut method.
<- HSD.test(example_aov,
example_tukey # what to group by?
"Explanatory",
# significance level?
alpha = 0.05,
# are data unbalanced
unbalanced = FALSE,
# show answer?
console = TRUE)
Study: example_aov ~ "Explanatory"
HSD Test for Response
Mean Square Error: 0.6758192
Explanatory, means
Response std r se Min Max Q25 Q50
x1 1.7620153 1.0505466 10 0.2599652 0.4504476 3.972459 1.1175485 1.44911720
x2 0.2841495 0.7532422 10 0.2599652 -0.4729986 1.985826 -0.3497379 0.21347543
x3 0.2197337 0.7019368 10 0.2599652 -0.6150452 1.574903 -0.2436023 -0.04493909
x4 -0.2890524 0.7345336 10 0.2599652 -1.9769014 0.684072 -0.5394534 -0.07741642
Q75
x1 2.07491533
x2 0.64579865
x3 0.53544151
x4 0.04323417
Alpha: 0.05 ; DF Error: 36
Critical Value of Studentized Range: 3.808798
Minimun Significant Difference: 0.9901551
Treatments with the same letter are not significantly different.
Response groups
x1 1.7620153 a
x2 0.2841495 b
x3 0.2197337 b
x4 -0.2890524 b
Remember - you need to change "Explanatory"
to your explanatory variable (in quotes!) and you need to change Response
to your response column (no quotes!). The following requires plyr
to work, but the function itself should call up plyr
if you do not yet have it loaded.
# summarize by group
<- function(data, explanatory){
summary_data require(plyr)
ddply(data, paste(explanatory), summarise,
N = length(Response),
mean = mean(Response),
sd = sd(Response),
se = sd / sqrt(N))
}
<- summary_data(data = data, explanatory = "Explanatory") example_summary
Loading required package: plyr
------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
------------------------------------------------------------------------------
Attaching package: 'plyr'
The following objects are masked from 'package:dplyr':
arrange, count, desc, failwith, id, mutate, rename, summarise,
summarize
The following object is masked from 'package:purrr':
compact
example_summary
Explanatory N mean sd se
1 x1 10 1.7620153 1.0505466 0.3322120
2 x2 10 0.2841495 0.7532422 0.2381961
3 x3 10 0.2197337 0.7019368 0.2219719
4 x4 10 -0.2890524 0.7345336 0.2322799
This command requires a Tukey HSD object from agricolae
. You can manually create a table like this for some other scenarios; see relevant pages for documentation.
# note first group must be EXACT MATCH to your summary_data object
# groups are saved in the Tukey object
# this is true for Tukey later as well
# the following is a function that will make the significant label table
<- function(tukey_test, group_name){
sig.label.maker <- tukey_test$groups |>
sig.labels # convert to a data.frame
as.data.frame() |>
# create a new column - place rownames into the column
# converts to a format better for ggplot
mutate(Explanatorys = rownames(tukey_test$groups)) |>
# rename column to prevent confusion
# specify dplyr; default function may be from plyr and not work
::rename(Significance = groups)
dplyrcolnames(sig.labels)[which(colnames(sig.labels) == "Explanatorys")] <- group_name
return(sig.labels)
}
# Function requires explanatory groups in quotes
<- sig.label.maker(example_tukey, "Explanatory")
sig_labels
sig_labels
Response Significance Explanatory
x1 1.7620153 a x1
x2 0.2841495 b x2
x3 0.2197337 b x3
x4 -0.2890524 b x4
The following function plots ANOVAS if you have a summary_data
object and a sig_labels
object, as shown above. This does not work on ANOVA with interactive components.
<- function(summary_data, explanatory,
anova_plotter
response, sig_labels,y_lim=NA, label_height=NA,
y_lab=NA, x_lab=NA){
require(tidyverse)
<- summary_data[,c(explanatory, response, "se")]
plot_data_1 <- sig_labels[,c(explanatory,"Significance")]
plot_data_2
colnames(plot_data_1) <- c("explanatory", "response", "se")
colnames(plot_data_2) <- c("explanatory", "Significance")
<- plot_data_1 |>
plot_data full_join(plot_data_2, by = "explanatory")
if(is.na(y_lim)){
if(min(plot_data$response) < 0){
<- c(min(plot_data$response) -
y_lim 4*max(plot_data$se),
max(plot_data$response) +
4*max(plot_data$se))
else{
}<- c(0,max(plot_data$response) +
y_lim 4*max(plot_data$se))
}
}if(is.na(label_height)){label_height <- 0.25*max(y_lim)}
if(is.na(y_lab)){y_lab <- "Response"}
if(is.na(x_lab)){x_lab <- "Treatment"}
<- ggplot(plot_data,
plot_1 aes(x = explanatory, y = response)) +
geom_point() +
geom_errorbar(data = plot_data,
aes(ymin = response - 2*se,
ymax = response + 2*se,
width = 0.1)) +
ylim(y_lim) +
theme_classic() +
theme(legend.position = "none",
axis.text.x = element_text(angle = 90, vjust = 0.5, size = 5)) +
geom_text(data = plot_data,
# make bold
fontface = "bold",
# define where labels should go
aes(x = explanatory,
# define height of label
y = label_height,
# what are the labels?
label = paste0(Significance))) +
xlab(x_lab) +
ylab(y_lab)
print(plot_1)
}
anova_plotter(summary_data = example_summary,
explanatory = "Explanatory",
response = "mean", # from summary_data table! What is to be plotted
sig_labels = sig_labels)
Note that in the above, the default label height is not working for us. We can adjust this with label_height
.
anova_plotter(summary_data = example_summary,
explanatory = "Explanatory",
response = "mean", # from summary_data table! What is to be plotted
sig_labels = sig_labels,
label_height = -1)
Much better!