8.5 Exercise

  1. Split the ames dataset into training and testing sets
  2. Fit a linear model
  3. Measure how the model is performing
  • Hint: set a seed (e.g., 42)
  • Hint: library(tidyverse), library(tidymodels)

8.5.1 Solution

library(tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
## x readr::spec()   masks yardstick::spec()
library(tidymodels)
## ── Attaching packages ───────────────────────────────────────────────────────────── tidymodels 0.0.3 ──
## ✓ broom   0.5.4     ✓ recipes 0.1.9
## ✓ dials   0.0.4     ✓ rsample 0.0.5
## ✓ infer   0.5.1
## ── Conflicts ──────────────────────────────────────────────────────────────── tidymodels_conflicts() ──
## x scales::discard()   masks purrr::discard()
## x dplyr::filter()     masks stats::filter()
## x recipes::fixed()    masks stringr::fixed()
## x dplyr::lag()        masks stats::lag()
## x dials::margin()     masks ggplot2::margin()
## x readr::spec()       masks yardstick::spec()
## x recipes::step()     masks stats::step()
## x recipes::yj_trans() masks scales::yj_trans()
set.seed(42)

ames_split <- rsample::initial_split(ames)
ames_train <- rsample::training(ames_split)
ames_test <- rsample::testing(ames_split)
lm_fit <- parsnip::linear_reg() %>%
  parsnip::set_engine(engine = "lm") %>%
  parsnip::fit(Sale_Price ~ Gr_Liv_Area, data = ames_train)
price_pred <- lm_fit %>%
  stats::predict(new_data = ames_test) %>%
  dplyr::mutate(price_truth = ames_test$Sale_Price)
yardstick::rmse(price_pred, truth = price_truth, estimate = .pred)
## # A tibble: 1 x 3
##   .metric .estimator .estimate
##   <chr>   <chr>          <dbl>
## 1 rmse    standard      56478.