# - - - - - - Working Script - - - - - - # #### Initial Setup #### # Create a new project in RStudio (File > New Project) # Keep your code clean, organized, full of comments # Break your code into easily-readable, bookmarked chunks (use four of these: #) # Read more here: https://course.img.cas.cz/R/material/data-cheatsheets-and-extras/supplement/R_Style_Guides.html # Always make sure you have the latest version of R and RStudio installed # Always ensure that your installed packages are up-to-date # Your script should run error-free from top to bottom at once # Projects help you organize further by amassing all relevant files into a single place # .RData is one of the most important files in your working directory (if it took you 20 minutes to get your results, .RData will help you retrieve it in 20 seconds) # Use save.image(".RData") to peridiocally write your entire R environment to disk. If RStudio freezes, this will save you! rm(list = ls()) ### Clean up RStudio's environment (all your variables and data will be lost) cat('\f') ### Clean up the console / CTRL+L works too version packageStatus() update.packages() ### Update all installed CRAN packages to the latest available package source("https://bioconductor.org/biocLite.R") biocLite() biocLite("BiocUpgrade") ### Upgrade Bioconductor to the latest available version BiocInstaller::biocValid() ### Are installed packages consistent (neither out-of-date nor too new) with the version of R and Bioconductor in use? getwd() ### Path to the current working directory dir() ### List of files in the current directory sessionInfo() ### List of installed packages, session info, etc. gtools::loadedPackages() ### List of installed packages biocLite(c("ggplot2", 'cttobin/ggthemr', "xcms")) ### Install packages from several sources #### Load Data #### mice.data <- openxlsx::read.xlsx("https://course.img.cas.cz/R/material/data-visualization-module/data/mice.xlsx") View(mice.data) dim(mice.data) mice.data %>% dim colnames(mice.data) #### Plot Data #### ggplot(data = mice.data, mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_point() #### Define mapping locally p <- ggplot(data = mice.data, mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_point() ### Adjust the titles on the plot p + labs(title = "Mice data scatterplot", subtitle = "Resting time and travel distance", y = "Distance Travelled", x = "Whole arena resting time") ### Another way of defining plot titles p + ggtitle("Mice data scatterplot", subtitle = "Resting time and travel distance") + xlab("Whole arena resting time") + ylab("Distance Travelled") ### Smoothing line p + geom_smooth(method = "lm", se = TRUE) ?geom_smooth ### help page p + geom_smooth(method = "lm") + coord_cartesian(xlim = c(200, 240), ylim = c(1000, 2000)) + theme_dark() p + theme_dark() + xlim(c(200, 240)) + ylim(c(1000, 2000)) + geom_smooth(method = "lm") g <- ggplot(data = mice.data) + geom_rug(aes(Whole.arena.resting.time.5, Distance.travelled.5), color = "red") + geom_point(mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5, col = Tail.length)) + geom_smooth(mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5), method = "lm", color = "yellow") + ggthemes::theme_solarized_2(light = FALSE) + scale_color_solarized("blue") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(title = "Distance traveled vs. Resting time", subtitle = "Mice measurement data", x = "Distance traveled 5", y = "Whole arena resting time", caption = "Phenotyping data") g small <- dplyr::filter(iris, Sepal.Length > 7) ggplot(iris) + geom_rug(mapping = aes(Sepal.Length, Sepal.Width)) + geom_point(aes(Sepal.Length, Sepal.Width, col = Species)) + geom_point(data = small, mapping = aes(Sepal.Length, Sepal.Width), col = "white", shape = 12) ggplot(data = mice.data, mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_point(col = "steelblue", size = 4, shape = 6) + geom_smooth(method = "loess", color = "firebrick") + theme(axis.text.x = element_text(angle = 90, hjust = 1)) + labs(title = "Distance traveled vs. Resting time", subtitle = "Mice measurement data", y = "Whole arena resting time 5", x = "Distance traveled 5", caption = "Phenotyping data") + ggthemes::theme_hc(bgcolor = "darkunica") + ggthemes::scale_fill_hc("darkunica") ggplot(data = mice.data, mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_point(aes(col = sex), size = 3) + geom_smooth(method = "loess", color = "firebrick") + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "top") + scale_colour_brewer(palette = "Set2") unique(mice.data$sex) mice.data$sex[mice.data$sex == "Male "] <- "Male" mice.data$sex %>% unique ggplot(data = mice.data, mapping = aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_point(aes(col = sex), size = 3) + geom_smooth(method = "loess", color = "firebrick") + theme(axis.text.x = element_text(angle = 90, hjust = 1), legend.position = "top") + scale_colour_brewer(palette = "Set2") select.mice <- mice.data %>% dplyr::group_by(cohort) %>% dplyr::filter(Distance.travelled.5 > 2000) ggthemr::ggthemr("earth") ggplot(mice.data, aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_point(mapping = aes(color = Tail.length)) + geom_label(mapping = aes(label = Gait), data = select.mice) + coord_cartesian(xlim = c(180,350), ylim = c(1200,2300)) + ggrepel::geom_label_repel(mapping = aes(label = Gait), data = select.mice, segment.color = "white") ggthemr::ggthemr_reset() ggplot(data = mice.data) + geom_boxplot(mapping = aes(Gait, weight_w13)) summary(mice.data$weight_w13) mice.data$weight_w13 %>% class #### same as class(mice.data$weight_w13) Hmisc::describe(mice.data$weight_w13) mice.data$weight_w13 <- as.numeric(mice.data$weight_w13) # mice.data$weight_w13 %<>% as.numeric # Usage of magrittr compound assignment pipe-operator ggplot(data = mice.data) + geom_boxplot(mapping = aes(Gait, weight_w13)) ggplot(mice.data) + geom_boxplot(mapping = aes(cohort, weight_w13, col = sex)) + theme(axis.text.x = element_text(angle = 90)) + ylab(label = "Weight") ggplot(mice.data) + geom_boxplot(mapping = aes(Gait, weight_w13, fill = sex)) + geom_jitter(aes(Gait, weight_w13, col = sex), width = 0.3) + ylab("Weight (w13)") + scale_fill_brewer(palette="Dark2") + theme_minimal() fun_mean <- function(x){ return(data.frame(y = mean(x), label=mean(x,na.rm=T)))} ggplot(mice.data, mapping = aes(Gait, weight_w13)) + geom_boxplot() + ylab("Weight (week 13)") + scale_fill_brewer(palette="Dark2") + theme_minimal() + stat_summary(fun.y = mean, geom="point", color = "darkred", size = 3) + stat_summary(fun.data = fun_mean, geom="text", vjust = 1.7) ggplot(mice.data) + geom_bar(aes(Tail.length), color = "black") + xlab(label = "Tail length") + ylab(label = "Count") ggplot(mice.data) + geom_histogram(aes(weight_w13, fill = Gait), color = "black", position = "dodge") #### Histogram with additional density plot #### density_hist_mice <- mice.data %>% ggplot(data = ., aes(x = weight_w13, color = sex, fill = sex)) + geom_histogram(mapping = aes(y = ..density..), position = "identity", alpha = 0.5) + geom_density(alpha = 0.6) + scale_color_manual(values = c("#999999", "#E69F00")) + scale_fill_manual(values = c("#999999", "#E69F00")) + labs(title = "Male vs. Female (Weight)", x = "Weight (w13)", y = "Density") + theme_classic() #### Save Plots #### ggsave(filename = "Denisty_Histogram.png", plot = density_hist_mice, device = "png", width = 15, height = 7, units = "in", dpi = 1200) #### Interactive Plots #### ggplotly(g) #### Facets #### ggplot(mice.data, aes(x = Whole.arena.resting.time.5, y = Distance.travelled.5)) + geom_line() + facet_wrap(~cohort, ncol = 3) mice.data %>% filter(cohort %in% c("c015", "c016", "c017", "c018", "c020")) %>% select(c("sex", "cohort", "strain", "Glucose.conc.0", "Glucose.conc.15", "Glucose.conc.30", "Glucose.conc.60", "Glucose.conc.120")) %>% gather(key = "time_point", value = "Glc_c", 4:8) %>% mutate(time = as.numeric(str_extract(time_point, "\\d+"))) %>% mutate(Glc_c = as.numeric(Glc_c)) %>% group_by(strain, sex, time, cohort) %>% summarise(Glc_avg = mean(Glc_c)) %>% ggplot(aes(x=time, y=Glc_avg, color=strain)) + geom_line()+ theme_bw() + ylab("Glucose concentration [mmol/L]") + facet_grid(sex~cohort)