Bike Lanes Dataset: BikeBaltimore is the Department of Transportation’s bike program. The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
You can Download as a CSV in your current working directory. Note its also available at: http://johnmuschelli.com/intro_to_r/data/Bike_Lanes.csv
library(readr)
library(tidyverse)
library(dplyr)
library(lubridate)
library(jhur)
bike = read_csv(
"http://johnmuschelli.com/intro_to_r/data/Bike_Lanes.csv")
## Parsed with column specification:
## cols(
## subType = col_character(),
## name = col_character(),
## block = col_character(),
## type = col_character(),
## numLanes = col_double(),
## project = col_character(),
## route = col_character(),
## length = col_double(),
## dateInstalled = col_double()
## )
or use
library(jhur)
bike = read_bike()
## Parsed with column specification:
## cols(
## subType = col_character(),
## name = col_character(),
## block = col_character(),
## type = col_character(),
## numLanes = col_double(),
## project = col_character(),
## route = col_character(),
## length = col_double(),
## dateInstalled = col_double()
## )
type column. Use sort(unique()). Assign this to an object btypes. Type dput(btypes)head(factor(bike$type))
## [1] BIKE BOULEVARD SIDEPATH SIGNED ROUTE SIDEPATH
## [5] BIKE LANE SIGNED ROUTE
## 7 Levels: BIKE BOULEVARD BIKE LANE CONTRAFLOW ... SIGNED ROUTE
btypes = sort(unique(bike$type))
x = c("SIDEPATH","BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW",
"SHARED BUS BIKE", "SHARROW", "SIGNED ROUTE")
dput(btypes)
## c("BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIDEPATH", "SIGNED ROUTE")
dput(btypes)[c(6,1:5,7)]
## c("BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIDEPATH", "SIGNED ROUTE")
## [1] "SIDEPATH" "BIKE BOULEVARD" "BIKE LANE"
## [4] "CONTRAFLOW" "SHARED BUS BIKE" "SHARROW"
## [7] "SIGNED ROUTE"
dput(btypes[c(6,1:5,7)])
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIGNED ROUTE")
lev = c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
"SHARROW", "SIGNED ROUTE")
type as a factor that has the SIDEPATH level first. Print head(bike$type). Note what you see. Run table(bike$type) afterwards and note the orderbike$type = factor(bike$type)
bike$type = relevel(bike$type, "SIDEPATH")
bike$type = factor(bike$type,
levels = dput(btypes[c(6,1:5,7)]))
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIGNED ROUTE")
bike = bike %>% mutate(type = factor(type,
levels = dput(btypes[c(6,1:5,7)])))
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIGNED ROUTE")
table(bike$type)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE CONTRAFLOW
## 7 49 621 13
## SHARED BUS BIKE SHARROW SIGNED ROUTE
## 39 589 304
type2, which is a factor of type, with the levels: c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE"). Run table(bike$type2), with the option useNA = "always". Note, we do not have to make type a character again before doing thisbike = bike %>%
mutate(type2 = factor(type,
levels = c( "SIDEPATH", "BIKE BOULEVARD",
"BIKE LANE") ) )
table(bike$type)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE CONTRAFLOW
## 7 49 621 13
## SHARED BUS BIKE SHARROW SIGNED ROUTE
## 39 589 304
table(bike$type2)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE
## 7 49 621
table(bike$type2, useNA = "always")
##
## SIDEPATH BIKE BOULEVARD BIKE LANE <NA>
## 7 49 621 954
dateInstalled into a character using as.character. Run head(bike$dateInstalled).bike = bike %>%
mutate(dateInstalled =
as.character(dateInstalled)
)
head(bike$dateInstalled)
## [1] "0" "2010" "2010" "0" "2011" "2007"
dateInstalled a factor, using the default levels. Run head(bike$dateInstalled).bike = bike %>%
mutate(dateInstalled =
factor(dateInstalled)
)
head(bike$dateInstalled)
## [1] 0 2010 2010 0 2011 2007
## Levels: 0 2006 2007 2008 2009 2010 2011 2012 2013
table(factor(bike$dateInstalled, levels = 2005:2017))
##
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
## 0 2 368 206 86 625 101 107 10 0 0 0 0
table(factor(bike$dateInstalled, levels = 2005:2017),
useNA="ifany")
##
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 <NA>
## 0 2 368 206 86 625 101 107 10 0 0 0 0 126
dateInstalled, but simply run head(as.numeric(bike$dateInstalled)) we are looking to see what happens when we try to go from factor to numerichead(as.numeric(bike$dateInstalled))
## [1] 1 6 6 1 7 3
dateInstalled, but simply run head(as.numeric(as.character(bike$dateInstalled))) this is how you get “numeric” values back if they were “incorrectly” factorshead(as.numeric(as.character(
bike$dateInstalled)))
## [1] 0 2010 2010 0 2011 2007
type back to a character. Make a column type2 (replacing the old one) where if the type is one of these categories: c("CONTRAFLOW", "SHARED BUS BIKE", "SHARROW", "SIGNED ROUTE") call it "OTHER". Use %in% and ifelse. Make type2 a factor with the levels c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "OTHER")bike = bike %>% mutate(
type = as.character(type),
type2 = ifelse(type %in% c("CONTRAFLOW", "SHARED BUS BIKE",
"SHARROW", "SIGNED ROUTE"), "OTHER", type),
type2 = factor(type2, levels = c( "SIDEPATH", "BIKE BOULEVARD",
"BIKE LANE", "OTHER") ))
table(bike$type2)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE OTHER
## 7 49 621 945
bike2 = bike %>%
mutate(
type = factor(type,
levels = c( "SIDEPATH", "BIKE BOULEVARD",
"BIKE LANE", "CONTRAFLOW",
"SHARED BUS BIKE",
"SHARROW", "SIGNED ROUTE")
),
type2 = recode_factor(type,
"CONTRAFLOW" = "OTHER",
"SHARED BUS BIKE" = "OTHER",
"SHARROW" = "OTHER",
"SIGNED ROUTE" = "OTHER")
)
table(bike2$type2)
##
## OTHER SIDEPATH BIKE BOULEVARD BIKE LANE
## 945 7 49 621
lubridate function:ymd("2014/02-14")
## [1] "2014-02-14"
mdy_hm("04/22/14 03:20")
## [1] "2014-04-22 03:20:00 UTC"
mdymdy_hms("4/5/2016 03:2:22")
## [1] "2016-04-05 03:02:22 UTC"