Bike Lanes Dataset: BikeBaltimore is the Department of Transportation’s bike program. The data is from http://data.baltimorecity.gov/Transportation/Bike-Lanes/xzfj-gyms
You can Download as a CSV in your current working directory. Note its also available at: http://johnmuschelli.com/intro_to_r/data/Bike_Lanes.csv
library(readr)
library(tidyverse)
library(dplyr)
library(lubridate)
library(jhur)
bike = read_csv(
"http://johnmuschelli.com/intro_to_r/data/Bike_Lanes.csv")
## Parsed with column specification:
## cols(
## subType = col_character(),
## name = col_character(),
## block = col_character(),
## type = col_character(),
## numLanes = col_double(),
## project = col_character(),
## route = col_character(),
## length = col_double(),
## dateInstalled = col_double()
## )
or use
library(jhur)
bike = read_bike()
## Parsed with column specification:
## cols(
## subType = col_character(),
## name = col_character(),
## block = col_character(),
## type = col_character(),
## numLanes = col_double(),
## project = col_character(),
## route = col_character(),
## length = col_double(),
## dateInstalled = col_double()
## )
type
column. Use sort(unique())
. Assign this to an object btypes.
Type dput(btypes)
head(factor(bike$type))
## [1] BIKE BOULEVARD SIDEPATH SIGNED ROUTE SIDEPATH
## [5] BIKE LANE SIGNED ROUTE
## 7 Levels: BIKE BOULEVARD BIKE LANE CONTRAFLOW ... SIGNED ROUTE
btypes = sort(unique(bike$type))
x = c("SIDEPATH","BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW",
"SHARED BUS BIKE", "SHARROW", "SIGNED ROUTE")
dput(btypes)
## c("BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIDEPATH", "SIGNED ROUTE")
dput(btypes)[c(6,1:5,7)]
## c("BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIDEPATH", "SIGNED ROUTE")
## [1] "SIDEPATH" "BIKE BOULEVARD" "BIKE LANE"
## [4] "CONTRAFLOW" "SHARED BUS BIKE" "SHARROW"
## [7] "SIGNED ROUTE"
dput(btypes[c(6,1:5,7)])
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIGNED ROUTE")
lev = c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
"SHARROW", "SIGNED ROUTE")
type
as a factor that has the SIDEPATH
level first. Print head(bike$type)
. Note what you see. Run table(bike$type)
afterwards and note the orderbike$type = factor(bike$type)
bike$type = relevel(bike$type, "SIDEPATH")
bike$type = factor(bike$type,
levels = dput(btypes[c(6,1:5,7)]))
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIGNED ROUTE")
bike = bike %>% mutate(type = factor(type,
levels = dput(btypes[c(6,1:5,7)])))
## c("SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "CONTRAFLOW", "SHARED BUS BIKE",
## "SHARROW", "SIGNED ROUTE")
table(bike$type)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE CONTRAFLOW
## 7 49 621 13
## SHARED BUS BIKE SHARROW SIGNED ROUTE
## 39 589 304
type2
, which is a factor of type, with the levels: c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE")
. Run table(bike$type2)
, with the option useNA = "always"
. Note, we do not have to make type a character again before doing thisbike = bike %>%
mutate(type2 = factor(type,
levels = c( "SIDEPATH", "BIKE BOULEVARD",
"BIKE LANE") ) )
table(bike$type)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE CONTRAFLOW
## 7 49 621 13
## SHARED BUS BIKE SHARROW SIGNED ROUTE
## 39 589 304
table(bike$type2)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE
## 7 49 621
table(bike$type2, useNA = "always")
##
## SIDEPATH BIKE BOULEVARD BIKE LANE <NA>
## 7 49 621 954
dateInstalled
into a character using as.character
. Run head(bike$dateInstalled)
.bike = bike %>%
mutate(dateInstalled =
as.character(dateInstalled)
)
head(bike$dateInstalled)
## [1] "0" "2010" "2010" "0" "2011" "2007"
dateInstalled
a factor, using the default levels. Run head(bike$dateInstalled)
.bike = bike %>%
mutate(dateInstalled =
factor(dateInstalled)
)
head(bike$dateInstalled)
## [1] 0 2010 2010 0 2011 2007
## Levels: 0 2006 2007 2008 2009 2010 2011 2012 2013
table(factor(bike$dateInstalled, levels = 2005:2017))
##
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017
## 0 2 368 206 86 625 101 107 10 0 0 0 0
table(factor(bike$dateInstalled, levels = 2005:2017),
useNA="ifany")
##
## 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 <NA>
## 0 2 368 206 86 625 101 107 10 0 0 0 0 126
dateInstalled
, but simply run head(as.numeric(bike$dateInstalled))
we are looking to see what happens when we try to go from factor to numerichead(as.numeric(bike$dateInstalled))
## [1] 1 6 6 1 7 3
dateInstalled
, but simply run head(as.numeric(as.character(bike$dateInstalled)))
this is how you get “numeric” values back if they were “incorrectly” factorshead(as.numeric(as.character(
bike$dateInstalled)))
## [1] 0 2010 2010 0 2011 2007
type
back to a character. Make a column type2
(replacing the old one) where if the type is one of these categories: c("CONTRAFLOW", "SHARED BUS BIKE", "SHARROW", "SIGNED ROUTE")
call it "OTHER"
. Use %in%
and ifelse
. Make type2
a factor with the levels c( "SIDEPATH", "BIKE BOULEVARD", "BIKE LANE", "OTHER")
bike = bike %>% mutate(
type = as.character(type),
type2 = ifelse(type %in% c("CONTRAFLOW", "SHARED BUS BIKE",
"SHARROW", "SIGNED ROUTE"), "OTHER", type),
type2 = factor(type2, levels = c( "SIDEPATH", "BIKE BOULEVARD",
"BIKE LANE", "OTHER") ))
table(bike$type2)
##
## SIDEPATH BIKE BOULEVARD BIKE LANE OTHER
## 7 49 621 945
bike2 = bike %>%
mutate(
type = factor(type,
levels = c( "SIDEPATH", "BIKE BOULEVARD",
"BIKE LANE", "CONTRAFLOW",
"SHARED BUS BIKE",
"SHARROW", "SIGNED ROUTE")
),
type2 = recode_factor(type,
"CONTRAFLOW" = "OTHER",
"SHARED BUS BIKE" = "OTHER",
"SHARROW" = "OTHER",
"SIGNED ROUTE" = "OTHER")
)
table(bike2$type2)
##
## OTHER SIDEPATH BIKE BOULEVARD BIKE LANE
## 945 7 49 621
lubridate
function:ymd("2014/02-14")
## [1] "2014-02-14"
mdy_hm("04/22/14 03:20")
## [1] "2014-04-22 03:20:00 UTC"
mdy
mdy_hms("4/5/2016 03:2:22")
## [1] "2016-04-05 03:02:22 UTC"