Data used

Bike Lanes Dataset: BikeBaltimore is the Department of Transportation’s bike program. The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms

You can Download as a CSV in your current working directory. Note its also available at: http://johnmuschelli.com/intro_to_r/data/Bike_Lanes.csv

library(readr)
library(tidyverse)
## ── Attaching packages ────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.1          ✔ purrr   0.3.2     
## ✔ tibble  2.1.1.9000     ✔ dplyr   0.8.0.1   
## ✔ tidyr   0.8.3          ✔ stringr 1.4.0     
## ✔ ggplot2 3.1.1          ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(jhur)

bike = read_csv(
  "http://johnmuschelli.com/intro_to_r/data/Bike_Lanes.csv")
## Parsed with column specification:
## cols(
##   subType = col_character(),
##   name = col_character(),
##   block = col_character(),
##   type = col_character(),
##   numLanes = col_double(),
##   project = col_character(),
##   route = col_character(),
##   length = col_double(),
##   dateInstalled = col_double()
## )

or use

library(jhur)
bike = read_bike()
## Parsed with column specification:
## cols(
##   subType = col_character(),
##   name = col_character(),
##   block = col_character(),
##   type = col_character(),
##   numLanes = col_double(),
##   project = col_character(),
##   route = col_character(),
##   length = col_double(),
##   dateInstalled = col_double()
## )

Part 1

  1. Get all the differet types of bike types from the type column. Use sort(unique()). Assign this to an object btypes. Type dput(btypes)
btypes = sort(unique(bike$type))

dput(btypes)
## c("BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE", 
## "SHARROW", "SIDEPATH", "SIGNED ROUTE")
dput(btypes)[c(6,1:5,7)]
## c("BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE", 
## "SHARROW", "SIDEPATH", "SIGNED ROUTE")
## [1] "SIDEPATH"        "BIKE BOULEVARD"  "BIKE LANE"       "CONTRAFLOW"     
## [5] "SHARED BUS BIKE" "SHARROW"         "SIGNED ROUTE"
dput(btypes[c(6,1:5,7)])
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE", 
## "SHARROW", "SIGNED ROUTE")
lev = c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE", 
        "SHARROW", "SIGNED ROUTE")
  1. Reorder the output of the dput command so SIDEPATH is first.
    Recode type as a factor using this vector of levels.
    Print head(bike$type). Note what you see. Run table(bike$type) afterwards and note the order
bike$type = factor(bike$type,
          levels = dput(btypes[c(6,1:5,7)]))
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE", 
## "SHARROW", "SIGNED ROUTE")
bike = bike %>% mutate(type = factor(type, 
                levels = dput(btypes[c(6,1:5,7)])))
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE", 
## "SHARROW", "SIGNED ROUTE")
table(bike$type)
## 
##        SIDEPATH  BIKE BOULEVARD       BIKE LANE      CONTRAFLOW 
##               7              49             621              13 
## SHARED BUS BIKE         SHARROW    SIGNED ROUTE 
##              39             589             304
  1. Make a column type2, which is a factor of type, with the levels: c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE"). Run table(bike$type2), with the option useNA = "always". Note, we do not have to make type a character again before doing this
bike = bike %>% 
  mutate(type2 = factor(type, 
             levels = c( "SIDEPATH", "BIKE BOULEVARD", 
                         "BIKE LANE") ) )
table(bike$type)
## 
##        SIDEPATH  BIKE BOULEVARD       BIKE LANE      CONTRAFLOW 
##               7              49             621              13 
## SHARED BUS BIKE         SHARROW    SIGNED ROUTE 
##              39             589             304
table(bike$type2)
## 
##       SIDEPATH BIKE BOULEVARD      BIKE LANE 
##              7             49            621
table(bike$type2, useNA = "always")
## 
##       SIDEPATH BIKE BOULEVARD      BIKE LANE           <NA> 
##              7             49            621            954
  1. Reassign dateInstalled into a character using as.character. Run head(bike$dateInstalled).
bike = bike %>% 
  mutate(dateInstalled = 
           as.character(dateInstalled)
  )
head(bike$dateInstalled)
## [1] "0"    "2010" "2010" "0"    "2011" "2007"
  1. Reassign dateInstalled a factor, using the default levels. Run head(bike$dateInstalled).
bike = bike %>% 
  mutate(dateInstalled = 
           factor(dateInstalled)
  )
head(bike$dateInstalled)
## [1] 0    2010 2010 0    2011 2007
## Levels: 0 2006 2007 2008 2009 2010 2011 2012 2013
table(factor(bike$dateInstalled, levels = 2005:2017))
## 
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 
##    0    2  368  206   86  625  101  107   10    0    0    0    0
table(factor(bike$dateInstalled, levels = 2005:2017), 
        useNA="ifany")
## 
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 <NA> 
##    0    2  368  206   86  625  101  107   10    0    0    0    0  126
  1. Do not reassign dateInstalled, but simply run head(as.numeric(bike$dateInstalled)) we are looking to see what happens when we try to go from factor to numeric
head(as.numeric(bike$dateInstalled)) 
## [1] 1 6 6 1 7 3
  1. Do not reassign dateInstalled, but simply run head(as.numeric(as.character(bike$dateInstalled))) this is how you get “numeric” values back if they were “incorrectly” factors
head(as.numeric(as.character(
    bike$dateInstalled)))
## [1]    0 2010 2010    0 2011 2007

Part 2

  1. Convert type back to a character. Make a column type2 (replacing the old one) where if the type is one of these categories: c("CONTRAFLOW", "SHARED BUS BIKE", "SHARROW", "SIGNED ROUTE") call it "OTHER". Use %in% and ifelse. Make type2 a factor with the levels c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "OTHER")
bike = bike %>% mutate(
    type = as.character(type),
    type2 = ifelse(type %in% c("CONTRAFLOW", "SHARED BUS BIKE", 
                               "SHARROW", "SIGNED ROUTE"), "OTHER", type),
    type2 = factor(type2, levels = c( "SIDEPATH", "BIKE BOULEVARD", 
                               "BIKE LANE", "OTHER") ))

table(bike$type2)
## 
##       SIDEPATH BIKE BOULEVARD      BIKE LANE          OTHER 
##              7             49            621            945
bike2 = bike %>% 
  mutate(
    type = factor(type,
                  levels = c( "SIDEPATH", "BIKE BOULEVARD", 
                              "BIKE LANE", "CONTRAFLOW", 
                              "SHARED BUS BIKE", 
                              "SHARROW", "SIGNED ROUTE")
                  ),
    type2 = recode_factor(type, 
                          "CONTRAFLOW" = "OTHER",
                          "SHARED BUS BIKE" = "OTHER",
                          "SHARROW" = "OTHER",
                          "SIGNED ROUTE" = "OTHER")
  )
table(bike2$type2)
## 
##          OTHER       SIDEPATH BIKE BOULEVARD      BIKE LANE 
##            945              7             49            621

Part 3

  1. Parse the following dates with the correct lubridate function:
  1. “2014/02-14”
ymd("2014/02-14")
## [1] "2014-02-14"
  1. “04/22/14 03:20”
mdy_hm("04/22/14 03:20")
## [1] "2014-04-22 03:20:00 UTC"
  1. “4/5/2016 03:2:22” assume mdy
mdy_hms("4/5/2016 03:2:22")
## [1] "2016-04-05 03:02:22 UTC"