## ---- echo = FALSE------------------------------------------------------------ library(knitr) opts_chunk$set(comment = "") suppressPackageStartupMessages(library(dplyr)) library(dplyr) library(jhur) ## ----------------------------------------------------------------------------- library(tidyverse) ## ----------------------------------------------------------------------------- filter ## ----------------------------------------------------------------------------- head(stats::filter,2) ## ----------------------------------------------------------------------------- data(jhu_cars) df = jhu_cars # df is a copy of jhu_cars head(df) # changing df does **not** change jhu_cars ## ----------------------------------------------------------------------------- tbl = as_tibble(df) head(tbl) ## ----------------------------------------------------------------------------- head(mtcars, 2) head(as_tibble(mtcars), 2) ## ----------------------------------------------------------------------------- df = dplyr::rename(df, MPG = mpg) head(df) df = rename(df, mpg = MPG) # reset - don't need :: b/c not masked ## ----------------------------------------------------------------------------- df_upper = dplyr::rename_all(df, toupper) head(df_upper) ## ----------------------------------------------------------------------------- df$carb ## ----------------------------------------------------------------------------- select(df, mpg) ## ----------------------------------------------------------------------------- pull(select(df, mpg)) ## ----------------------------------------------------------------------------- select(df, mpg, cyl) select(df, starts_with("c")) ## ---- eval = FALSE------------------------------------------------------------ ## ??tidyselect::select_helpers ## ---- eval = FALSE------------------------------------------------------------ ## one_of() ## last_col() ## ends_with() ## contains() # like searching ## matches() # Matches a regular expression - cover later ## ----------------------------------------------------------------------------- filter(df, mpg > 20 | mpg < 14) ## ----------------------------------------------------------------------------- filter(df, mpg > 20 & cyl == 4) filter(df, mpg > 20, cyl == 4) ## ----------------------------------------------------------------------------- filter(df, mpg > 20 | cyl == 4) ## ----------------------------------------------------------------------------- select(filter(df, mpg > 20 & cyl == 4), cyl, hp) ## ----------------------------------------------------------------------------- df2 = filter(df, mpg > 20 & cyl == 4) df2 = select(df2, cyl, hp) ## ----------------------------------------------------------------------------- df %>% filter(mpg > 20 & cyl == 4) %>% select(cyl, hp) ## ----------------------------------------------------------------------------- df$newcol = df$wt/2.2 head(df,3) ## ----------------------------------------------------------------------------- df = mutate(df, newcol = wt/2.2) ## ---- echo = FALSE------------------------------------------------------------ print(head({df = mutate(df, newcol = wt/2.2)}, 2)) ## ----------------------------------------------------------------------------- df = mutate(df, disp_cat = ifelse( disp <= 200, "Low", ifelse(disp <= 400, "Medium", "High") ) ) head(df$disp_cat) ## ----------------------------------------------------------------------------- df = mutate(df, disp_cat2 = case_when( disp <= 200 ~ "Low", disp > 200 & disp <= 400 ~ "Medium", disp > 400 ~ "High", )) head(df$disp_cat2) ## ---- eval = FALSE------------------------------------------------------------ ## df$newcol = NULL ## ---- eval = FALSE------------------------------------------------------------ ## select(df, -newcol) ## ---- echo = FALSE------------------------------------------------------------ head(select(df, -newcol)) ## ---- eval = FALSE------------------------------------------------------------ ## select(df, -one_of("newcol", "drat")) ## ---- echo = FALSE------------------------------------------------------------ head(select(df, -one_of("newcol", "drat"))) ## ---- eval = FALSE------------------------------------------------------------ ## select(df, newcol, everything()) ## ---- echo = FALSE------------------------------------------------------------ head(select(df, newcol, everything())) ## ---- eval = FALSE------------------------------------------------------------ ## select(df, -newcol, everything(), newcol) ## ---- echo = FALSE------------------------------------------------------------ head(select(df, -newcol, everything(), newcol)) ## ----------------------------------------------------------------------------- arrange(df, mpg) ## ----------------------------------------------------------------------------- arrange(df, desc(mpg)) ## ----------------------------------------------------------------------------- arrange(df, mpg, desc(hp)) ## ----------------------------------------------------------------------------- transmute(df, newcol2 = wt/2.2, mpg, hp) ## ----------------------------------------------------------------------------- colnames(df) # just prints colnames(df)[1:3] = c("MPG", "CYL", "DISP") # reassigns head(df) colnames(df)[1:3] = c("mpg", "cyl", "disp") #reset - just to keep consistent ## ----------------------------------------------------------------------------- cn = colnames(df) cn[ cn == "drat"] = "DRAT" colnames(df) = cn head(df) colnames(df)[ colnames(df) == "DRAT"] = "drat" #reset ## ----------------------------------------------------------------------------- df[ c(1, 3), ] ## ----------------------------------------------------------------------------- df[, 11] df[, "carb"] ## ----------------------------------------------------------------------------- df[, 1] tbl[, 1] tbl[, "mpg"] df[, 1, drop = FALSE] ## ----------------------------------------------------------------------------- df[, c("mpg", "cyl")] ## ----------------------------------------------------------------------------- head(rownames_to_column(mtcars, var = "car"), 2) head(as_tibble(rownames_to_column(mtcars, var = "car")), 2)