#LOGISTIC REGRESSION PROJECT
##########################################################################
# ##############################################################################
# Part 1 Riding Mowers Reprise
# A company that manufactures riding mowers wants to identify the best sales
# prospects for an intenstive sales campaign.
# In particular, the company is interested in classifying households as prospective
# owners or non-owners on the basis of Income (in $1,000) and Lot-Size (in 1000 sqft).
# Use all the data to fit a logistic regression of ownership on the two predictors
##########################################################################
setwd("C:/Users/arodriguez/Dropbox/classes/DataMining/LogisticRegression/logistic_project")
options(digits = 3, scipen = 9999)
remove(list = ls())
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0 v purrr 0.2.5
## v tibble 2.0.1 v dplyr 0.7.8
## v tidyr 0.8.2 v stringr 1.3.1
## v readr 1.3.1 v forcats 0.3.0
## -- Conflicts ----------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(mosaic)
## Loading required package: lattice
## Loading required package: ggformula
## Loading required package: ggstance
##
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
##
## geom_errorbarh, GeomErrorbarh
##
## New to ggformula? Try the tutorials:
## learnr::run_tutorial("introduction", package = "ggformula")
## learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## The 'mosaic' package masks several functions from core packages in order to add
## additional features. The original behavior of these functions should not be affected by this.
##
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
##
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
##
## mean
## The following objects are masked from 'package:dplyr':
##
## count, do, tally
## The following object is masked from 'package:purrr':
##
## cross
## The following object is masked from 'package:ggplot2':
##
## stat
## The following objects are masked from 'package:stats':
##
## binom.test, cor, cor.test, cov, fivenum, IQR, median,
## prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
##
## max, mean, min, prod, range, sample, sum
library(caret)
##
## Attaching package: 'caret'
## The following object is masked from 'package:mosaic':
##
## dotPlot
## The following object is masked from 'package:purrr':
##
## lift
library(sjPlot)
# ##############################################################################
# For the Riding Mowers Data Set (Revised) show the following and answer the questions.
# 1. What percentage of households in the study were owners of a riding mower?
# 2. Create a scatter plot of Income vs. Lot Size using color or symbol to
# distinguish owner from non-owner
# 3. Among non-owners - what is the percentage of households classified correctly?
# 4. To increase the percentage of correctly classified nonowners, should the
# cutoff probability be increased or decreased?
# 5. What is the probability that a household with a $60K income and a lot size of
#20,000 sqft is an owner? what are the (log) odds?
# 6. What is the classification of a household with a $60K income and a
# lot size of 21,000 sqft? Use a cutoff of 0.5. And then answer the question
# using a cutoff or 0.75.
# 7. What is the minimum income that a household with a 16000 ft sq lot size
# should have before it is classified as an owner? Also: show a graph
# displaying the predicted probabilities of Ownership and Income
################################################################################
# Read the dataset
dir()
## [1] "LogisticRegressionProject.R"
## [2] "LogisticRegressionProject_AROD.knit.md"
## [3] "LogisticRegressionProject_AROD.R"
## [4] "LogisticRegressionProject_AROD.spin.R"
## [5] "LogisticRegressionProject_AROD.spin.Rmd"
## [6] "RidingMowers.csv"
## [7] "RidingMowers_REV.csv"
## [8] "RidingMowersLargeSet.csv"
## [9] "SystemAdministrators.xls"
riding = read.csv("RidingMowers_REV.csv", header = T)
head(riding)
## X Income Lot_Size Ownership
## 1 4.0 61.5 20.8 owner
## 2 23.0 51.0 14.0 non-owner
## 3 4.1 61.5 20.8 owner
## 4 24.0 63.0 14.8 non-owner
## 5 6.0 110.1 19.2 owner
## 6 5.0 87.0 23.6 owner
str(riding)
## 'data.frame': 1000 obs. of 4 variables:
## $ X : num 4 23 4.1 24 6 5 2 16 1 21 ...
## $ Income : num 61.5 51 61.5 63 110.1 ...
## $ Lot_Size : num 20.8 14 20.8 14.8 19.2 23.6 16.8 20.4 18.4 16.4 ...
## $ Ownership: Factor w/ 2 levels "non-owner","owner": 2 1 2 1 2 2 2 1 2 1 ...
summary(riding)
## X Income Lot_Size Ownership
## Min. : 1.00 Min. : 33.0 Min. :14.0 non-owner:511
## 1st Qu.: 7.27 1st Qu.: 51.0 1st Qu.:17.6 owner :489
## Median :13.19 Median : 64.8 Median :18.8
## Mean :12.88 Mean : 68.4 Mean :18.9
## 3rd Qu.:18.46 3rd Qu.: 82.8 3rd Qu.:20.8
## Max. :24.90 Max. :110.1 Max. :23.6
names(riding)
## [1] "X" "Income" "Lot_Size" "Ownership"
# 1. What percentage of households in the study were owners of a riding mower?
tally(~ Ownership, data = riding)
## Ownership
## non-owner owner
## 511 489
prop.table(table(riding$Ownership))
##
## non-owner owner
## 0.511 0.489
# 2. Create a scatter plot of Income vs. Lot Size using color or symbol to
# distinguish owner from non-owner
ggplot(riding, aes(x = Lot_Size, y = Income, col = Ownership)) +
geom_point(size = 4)

# 3. Among non-owners - what is the percentage of households classified correctly?
#Logistic regression with the full data set
riding_log = glm(Ownership ~ Income + Lot_Size,
data = riding, family = "binomial")
summary(riding_log)
##
## Call:
## glm(formula = Ownership ~ Income + Lot_Size, family = "binomial",
## data = riding)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.658 -0.465 -0.054 0.499 1.857
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -24.66138 1.66343 -14.8 <0.0000000000000002 ***
## Income 0.09941 0.00739 13.4 <0.0000000000000002 ***
## Lot_Size 0.93306 0.06901 13.5 <0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1385.81 on 999 degrees of freedom
## Residual deviance: 697.25 on 997 degrees of freedom
## AIC: 703.2
##
## Number of Fisher Scoring iterations: 6
plot_model(riding_log)

plot_model(riding_log, type = "pred")
## $Income

##
## $Lot_Size

riding_pred = predict(riding_log, newdata = riding, type = "response")
riding_pred
## 1 2 3 4 5 6 7 8 9
## 0.70266 0.00146 0.70266 0.01006 0.98520 0.99755 0.38074 0.20876 0.17822
## 10 11 12 13 14 15 16 17 18
## 0.00950 0.01006 0.70266 0.52771 0.92392 0.98520 0.98520 0.71826 0.49878
## 19 20 21 22 23 24 25 26 27
## 0.03394 0.74694 0.20876 0.20876 0.38074 0.70266 0.10239 0.02106 0.98520
## 28 29 30 31 32 33 34 35 36
## 0.98186 0.10239 0.74694 0.03394 0.98520 0.03394 0.70266 0.20876 0.52771
## 37 38 39 40 41 42 43 44 45
## 0.98869 0.99755 0.92392 0.88615 0.70266 0.92392 0.74694 0.20876 0.28252
## 46 47 48 49 50 51 52 53 54
## 0.17822 0.92392 0.52771 0.00146 0.38074 0.01006 0.70248 0.71826 0.52771
## 55 56 57 58 59 60 61 62 63
## 0.70266 0.74694 0.99755 0.52771 0.87375 0.98186 0.98869 0.38074 0.98186
## 64 65 66 67 68 69 70 71 72
## 0.02106 0.02106 0.02106 0.28252 0.00146 0.10239 0.70266 0.03394 0.99755
## 73 74 75 76 77 78 79 80 81
## 0.70266 0.20876 0.17822 0.49878 0.71826 0.02106 0.28252 0.17822 0.98186
## 82 83 84 85 86 87 88 89 90
## 0.49878 0.49878 0.03394 0.02130 0.99755 0.98520 0.00950 0.98186 0.98186
## 91 92 93 94 95 96 97 98 99
## 0.02106 0.52771 0.03394 0.70248 0.49878 0.98869 0.02106 0.52771 0.99755
## 100 101 102 103 104 105 106 107 108
## 0.87375 0.17822 0.20876 0.28252 0.52771 0.01006 0.98186 0.10239 0.71826
## 109 110 111 112 113 114 115 116 117
## 0.70248 0.74694 0.00950 0.49878 0.49878 0.38074 0.03394 0.74694 0.03394
## 118 119 120 121 122 123 124 125 126
## 0.10239 0.99755 0.98520 0.52771 0.52771 0.70248 0.00950 0.49878 0.17822
## 127 128 129 130 131 132 133 134 135
## 0.98520 0.00950 0.00950 0.10239 0.92392 0.92392 0.52771 0.01006 0.02106
## 136 137 138 139 140 141 142 143 144
## 0.03394 0.10239 0.02106 0.52771 0.17822 0.71826 0.03394 0.52771 0.02130
## 145 146 147 148 149 150 151 152 153
## 0.49878 0.92392 0.92392 0.38074 0.98186 0.70248 0.02130 0.28252 0.28252
## 154 155 156 157 158 159 160 161 162
## 0.49878 0.52771 0.88615 0.10239 0.03394 0.17822 0.52771 0.03394 0.38074
## 163 164 165 166 167 168 169 170 171
## 0.38074 0.10239 0.92392 0.98520 0.92392 0.98186 0.17822 0.03394 0.00146
## 172 173 174 175 176 177 178 179 180
## 0.52771 0.70248 0.28252 0.02130 0.00950 0.38074 0.02106 0.74694 0.88615
## 181 182 183 184 185 186 187 188 189
## 0.98869 0.52771 0.99755 0.70248 0.20876 0.10239 0.03394 0.74694 0.98520
## 190 191 192 193 194 195 196 197 198
## 0.10239 0.02106 0.92392 0.92392 0.02106 0.28252 0.87375 0.17822 0.00146
## 199 200 201 202 203 204 205 206 207
## 0.98869 0.00950 0.00950 0.98186 0.70266 0.92392 0.74694 0.98869 0.02106
## 208 209 210 211 212 213 214 215 216
## 0.02130 0.74694 0.28252 0.49878 0.98869 0.98869 0.28252 0.20876 0.03394
## 217 218 219 220 221 222 223 224 225
## 0.02106 0.74694 0.74694 0.02130 0.38074 0.88615 0.88615 0.87375 0.74694
## 226 227 228 229 230 231 232 233 234
## 0.98869 0.70266 0.10239 0.74694 0.71826 0.52771 0.20876 0.38074 0.03394
## 235 236 237 238 239 240 241 242 243
## 0.00146 0.00950 0.28252 0.98186 0.17822 0.02130 0.49878 0.10239 0.98186
## 244 245 246 247 248 249 250 251 252
## 0.92392 0.70266 0.70266 0.01006 0.87375 0.87375 0.10239 0.10239 0.70266
## 253 254 255 256 257 258 259 260 261
## 0.70266 0.88615 0.03394 0.98520 0.03394 0.92392 0.88615 0.28252 0.99755
## 262 263 264 265 266 267 268 269 270
## 0.98869 0.03394 0.17822 0.02130 0.52771 0.98520 0.71826 0.98520 0.70248
## 271 272 273 274 275 276 277 278 279
## 0.92392 0.02106 0.74694 0.02106 0.88615 0.20876 0.02106 0.00950 0.92392
## 280 281 282 283 284 285 286 287 288
## 0.52771 0.00950 0.52771 0.52771 0.49878 0.52771 0.03394 0.17822 0.28252
## 289 290 291 292 293 294 295 296 297
## 0.98869 0.74694 0.00950 0.74694 0.70248 0.03394 0.38074 0.20876 0.92392
## 298 299 300 301 302 303 304 305 306
## 0.17822 0.28252 0.98520 0.38074 0.03394 0.74694 0.87375 0.17822 0.17822
## 307 308 309 310 311 312 313 314 315
## 0.71826 0.98869 0.17822 0.20876 0.71826 0.74694 0.70248 0.52771 0.87375
## 316 317 318 319 320 321 322 323 324
## 0.28252 0.02130 0.38074 0.88615 0.00146 0.02130 0.92392 0.71826 0.52771
## 325 326 327 328 329 330 331 332 333
## 0.28252 0.71826 0.10239 0.92392 0.28252 0.20876 0.20876 0.02106 0.00950
## 334 335 336 337 338 339 340 341 342
## 0.71826 0.02130 0.74694 0.71826 0.28252 0.87375 0.00146 0.92392 0.20876
## 343 344 345 346 347 348 349 350 351
## 0.98869 0.28252 0.02130 0.03394 0.74694 0.49878 0.03394 0.00146 0.98186
## 352 353 354 355 356 357 358 359 360
## 0.92392 0.92392 0.02106 0.98520 0.98520 0.17822 0.17822 0.92392 0.17822
## 361 362 363 364 365 366 367 368 369
## 0.92392 0.87375 0.70248 0.98520 0.02130 0.74694 0.70248 0.10239 0.70248
## 370 371 372 373 374 375 376 377 378
## 0.03394 0.92392 0.98186 0.03394 0.17822 0.98186 0.70248 0.49878 0.03394
## 379 380 381 382 383 384 385 386 387
## 0.00950 0.88615 0.98186 0.20876 0.70248 0.71826 0.49878 0.49878 0.49878
## 388 389 390 391 392 393 394 395 396
## 0.98520 0.17822 0.20876 0.03394 0.02106 0.17822 0.02106 0.00950 0.00950
## 397 398 399 400 401 402 403 404 405
## 0.71826 0.10239 0.70266 0.28252 0.00950 0.71826 0.38074 0.49878 0.98869
## 406 407 408 409 410 411 412 413 414
## 0.98520 0.17822 0.10239 0.70266 0.38074 0.52771 0.03394 0.28252 0.49878
## 415 416 417 418 419 420 421 422 423
## 0.87375 0.98869 0.98869 0.70266 0.00950 0.88615 0.74694 0.00950 0.10239
## 424 425 426 427 428 429 430 431 432
## 0.02130 0.70248 0.17822 0.20876 0.03394 0.98520 0.70266 0.88615 0.20876
## 433 434 435 436 437 438 439 440 441
## 0.28252 0.52771 0.71826 0.74694 0.98869 0.88615 0.98520 0.38074 0.87375
## 442 443 444 445 446 447 448 449 450
## 0.02106 0.00950 0.98869 0.01006 0.98186 0.10239 0.02130 0.20876 0.74694
## 451 452 453 454 455 456 457 458 459
## 0.52771 0.38074 0.00950 0.17822 0.00146 0.17822 0.98186 0.52771 0.98520
## 460 461 462 463 464 465 466 467 468
## 0.20876 0.52771 0.99755 0.03394 0.52771 0.10239 0.49878 0.99755 0.74694
## 469 470 471 472 473 474 475 476 477
## 0.87375 0.02106 0.49878 0.17822 0.74694 0.00146 0.01006 0.99755 0.92392
## 478 479 480 481 482 483 484 485 486
## 0.52771 0.88615 0.74694 0.71826 0.87375 0.74694 0.98186 0.00950 0.70266
## 487 488 489 490 491 492 493 494 495
## 0.92392 0.49878 0.70248 0.87375 0.88615 0.87375 0.98869 0.71826 0.20876
## 496 497 498 499 500 501 502 503 504
## 0.70248 0.98520 0.02130 0.52771 0.00950 0.74694 0.88615 0.88615 0.01006
## 505 506 507 508 509 510 511 512 513
## 0.99755 0.99755 0.17822 0.98520 0.03394 0.00146 0.00950 0.17822 0.01006
## 514 515 516 517 518 519 520 521 522
## 0.20876 0.74694 0.17822 0.10239 0.38074 0.98186 0.87375 0.88615 0.70266
## 523 524 525 526 527 528 529 530 531
## 0.92392 0.00146 0.87375 0.52771 0.88615 0.10239 0.10239 0.17822 0.00950
## 532 533 534 535 536 537 538 539 540
## 0.02130 0.98186 0.02106 0.98869 0.03394 0.17822 0.52771 0.74694 0.52771
## 541 542 543 544 545 546 547 548 549
## 0.01006 0.03394 0.70266 0.28252 0.92392 0.49878 0.70266 0.74694 0.92392
## 550 551 552 553 554 555 556 557 558
## 0.00146 0.00146 0.87375 0.00950 0.02106 0.70266 0.99755 0.49878 0.00146
## 559 560 561 562 563 564 565 566 567
## 0.00950 0.92392 0.98869 0.01006 0.10239 0.17822 0.87375 0.87375 0.70266
## 568 569 570 571 572 573 574 575 576
## 0.88615 0.98869 0.70266 0.52771 0.02106 0.88615 0.70266 0.98186 0.71826
## 577 578 579 580 581 582 583 584 585
## 0.00950 0.00950 0.20876 0.00950 0.01006 0.38074 0.52771 0.00950 0.10239
## 586 587 588 589 590 591 592 593 594
## 0.00146 0.70266 0.88615 0.00950 0.87375 0.38074 0.03394 0.74694 0.87375
## 595 596 597 598 599 600 601 602 603
## 0.88615 0.49878 0.38074 0.70266 0.38074 0.99755 0.88615 0.02106 0.02130
## 604 605 606 607 608 609 610 611 612
## 0.98520 0.71826 0.49878 0.01006 0.02106 0.00950 0.01006 0.28252 0.03394
## 613 614 615 616 617 618 619 620 621
## 0.70266 0.98520 0.03394 0.99755 0.87375 0.03394 0.92392 0.88615 0.87375
## 622 623 624 625 626 627 628 629 630
## 0.98520 0.70266 0.28252 0.70248 0.98186 0.88615 0.02130 0.01006 0.03394
## 631 632 633 634 635 636 637 638 639
## 0.70266 0.92392 0.28252 0.28252 0.88615 0.87375 0.98869 0.00146 0.00146
## 640 641 642 643 644 645 646 647 648
## 0.98186 0.71826 0.28252 0.02130 0.88615 0.70266 0.74694 0.98869 0.02130
## 649 650 651 652 653 654 655 656 657
## 0.02106 0.87375 0.88615 0.70266 0.10239 0.00950 0.87375 0.92392 0.98186
## 658 659 660 661 662 663 664 665 666
## 0.98869 0.88615 0.70248 0.28252 0.02106 0.01006 0.92392 0.03394 0.17822
## 667 668 669 670 671 672 673 674 675
## 0.98520 0.00146 0.99755 0.28252 0.00146 0.99755 0.98186 0.17822 0.88615
## 676 677 678 679 680 681 682 683 684
## 0.98869 0.00950 0.88615 0.87375 0.17822 0.00146 0.71826 0.38074 0.98186
## 685 686 687 688 689 690 691 692 693
## 0.70248 0.20876 0.88615 0.02130 0.00146 0.71826 0.01006 0.99755 0.38074
## 694 695 696 697 698 699 700 701 702
## 0.03394 0.92392 0.10239 0.20876 0.00146 0.28252 0.20876 0.02106 0.02130
## 703 704 705 706 707 708 709 710 711
## 0.02106 0.00950 0.28252 0.00146 0.70248 0.28252 0.17822 0.10239 0.03394
## 712 713 714 715 716 717 718 719 720
## 0.00950 0.71826 0.52771 0.70266 0.71826 0.88615 0.01006 0.92392 0.28252
## 721 722 723 724 725 726 727 728 729
## 0.00146 0.92392 0.52771 0.49878 0.17822 0.02130 0.70248 0.92392 0.49878
## 730 731 732 733 734 735 736 737 738
## 0.74694 0.71826 0.38074 0.70266 0.88615 0.74694 0.49878 0.99755 0.98520
## 739 740 741 742 743 744 745 746 747
## 0.03394 0.38074 0.38074 0.00146 0.98869 0.02130 0.92392 0.98869 0.02106
## 748 749 750 751 752 753 754 755 756
## 0.20876 0.52771 0.70248 0.02106 0.02106 0.71826 0.49878 0.98869 0.98186
## 757 758 759 760 761 762 763 764 765
## 0.01006 0.70248 0.02130 0.38074 0.98520 0.92392 0.38074 0.92392 0.49878
## 766 767 768 769 770 771 772 773 774
## 0.00146 0.74694 0.03394 0.10239 0.98186 0.98869 0.02130 0.70248 0.52771
## 775 776 777 778 779 780 781 782 783
## 0.98520 0.52771 0.00950 0.74694 0.49878 0.70248 0.20876 0.28252 0.49878
## 784 785 786 787 788 789 790 791 792
## 0.87375 0.70248 0.88615 0.88615 0.28252 0.88615 0.01006 0.74694 0.87375
## 793 794 795 796 797 798 799 800 801
## 0.38074 0.02130 0.71826 0.70248 0.98869 0.20876 0.71826 0.70248 0.88615
## 802 803 804 805 806 807 808 809 810
## 0.28252 0.17822 0.74694 0.01006 0.98186 0.03394 0.70266 0.28252 0.38074
## 811 812 813 814 815 816 817 818 819
## 0.98869 0.98186 0.70248 0.20876 0.10239 0.38074 0.00950 0.98520 0.00950
## 820 821 822 823 824 825 826 827 828
## 0.52771 0.03394 0.38074 0.01006 0.70248 0.01006 0.02106 0.02130 0.17822
## 829 830 831 832 833 834 835 836 837
## 0.52771 0.88615 0.49878 0.70266 0.88615 0.71826 0.52771 0.28252 0.01006
## 838 839 840 841 842 843 844 845 846
## 0.02106 0.70248 0.98186 0.92392 0.01006 0.38074 0.17822 0.52771 0.98520
## 847 848 849 850 851 852 853 854 855
## 0.92392 0.98869 0.20876 0.99755 0.70266 0.03394 0.10239 0.01006 0.98869
## 856 857 858 859 860 861 862 863 864
## 0.71826 0.98186 0.70248 0.00146 0.01006 0.10239 0.49878 0.70248 0.38074
## 865 866 867 868 869 870 871 872 873
## 0.70266 0.70266 0.70248 0.01006 0.00146 0.70266 0.99755 0.99755 0.87375
## 874 875 876 877 878 879 880 881 882
## 0.74694 0.17822 0.74694 0.00950 0.00950 0.92392 0.98186 0.70248 0.28252
## 883 884 885 886 887 888 889 890 891
## 0.00950 0.74694 0.71826 0.02106 0.17822 0.70248 0.10239 0.02106 0.00146
## 892 893 894 895 896 897 898 899 900
## 0.92392 0.74694 0.28252 0.87375 0.00146 0.74694 0.87375 0.70248 0.00950
## 901 902 903 904 905 906 907 908 909
## 0.00950 0.00146 0.00146 0.71826 0.92392 0.70248 0.70248 0.87375 0.03394
## 910 911 912 913 914 915 916 917 918
## 0.98869 0.38074 0.98186 0.20876 0.01006 0.20876 0.20876 0.71826 0.98869
## 919 920 921 922 923 924 925 926 927
## 0.02130 0.87375 0.99755 0.28252 0.28252 0.52771 0.74694 0.98520 0.92392
## 928 929 930 931 932 933 934 935 936
## 0.98869 0.17822 0.28252 0.52771 0.20876 0.71826 0.00146 0.98186 0.02106
## 937 938 939 940 941 942 943 944 945
## 0.03394 0.00950 0.71826 0.28252 0.98520 0.01006 0.88615 0.20876 0.20876
## 946 947 948 949 950 951 952 953 954
## 0.00146 0.02106 0.98520 0.74694 0.70248 0.52771 0.98869 0.98186 0.02130
## 955 956 957 958 959 960 961 962 963
## 0.87375 0.92392 0.17822 0.00146 0.74694 0.74694 0.01006 0.28252 0.74694
## 964 965 966 967 968 969 970 971 972
## 0.98869 0.87375 0.00146 0.10239 0.02106 0.98186 0.98520 0.02130 0.49878
## 973 974 975 976 977 978 979 980 981
## 0.02106 0.20876 0.02106 0.00146 0.92392 0.02130 0.70248 0.17822 0.74694
## 982 983 984 985 986 987 988 989 990
## 0.03394 0.88615 0.98869 0.92392 0.49878 0.01006 0.17822 0.10239 0.74694
## 991 992 993 994 995 996 997 998 999
## 0.98869 0.70248 0.52771 0.98186 0.70248 0.87375 0.71826 0.88615 0.87375
## 1000
## 0.98520
riding_pred = ifelse(riding_pred> 0.5, "owner", "non-owner")
riding_pred = as.factor(riding_pred)
confusionMatrix(riding_pred, riding$Ownership)
## Confusion Matrix and Statistics
##
## Reference
## Prediction non-owner owner
## non-owner 407 85
## owner 104 404
##
## Accuracy : 0.811
## 95% CI : (0.785, 0.835)
## No Information Rate : 0.511
## P-Value [Acc > NIR] : <0.0000000000000002
##
## Kappa : 0.622
## Mcnemar's Test P-Value : 0.19
##
## Sensitivity : 0.796
## Specificity : 0.826
## Pos Pred Value : 0.827
## Neg Pred Value : 0.795
## Prevalence : 0.511
## Detection Rate : 0.407
## Detection Prevalence : 0.492
## Balanced Accuracy : 0.811
##
## 'Positive' Class : non-owner
##
# 4. To increase the percentage of correctly classified nonowners, should the
# cutoff probability be increased or decreased?
riding_pred = predict(riding_log, newdata = riding, type = "response")
riding_pred = ifelse(riding_pred> 0.65, "owner", "non-owner")
riding_pred = as.factor(riding_pred)
confusionMatrix(riding_pred, riding$Ownership)
## Confusion Matrix and Statistics
##
## Reference
## Prediction non-owner owner
## non-owner 458 85
## owner 53 404
##
## Accuracy : 0.862
## 95% CI : (0.839, 0.883)
## No Information Rate : 0.511
## P-Value [Acc > NIR] : < 0.0000000000000002
##
## Kappa : 0.723
## Mcnemar's Test P-Value : 0.00832
##
## Sensitivity : 0.896
## Specificity : 0.826
## Pos Pred Value : 0.843
## Neg Pred Value : 0.884
## Prevalence : 0.511
## Detection Rate : 0.458
## Detection Prevalence : 0.543
## Balanced Accuracy : 0.861
##
## 'Positive' Class : non-owner
##
# 5. What is the probability that a household with a $60K income and a lot size of
#20,000 sqft is an owner? what are the (log) odds?
newcustomers = data.frame(Income = 60, Lot_Size = 20)
riding_pred = predict(riding_log, newdata = newcustomers, type = "response")
riding_pred
## 1
## 0.491
riding_pred = predict(riding_log, newdata = newcustomers, type = "link")
riding_pred
## 1
## -0.0356
#to convert log odds to probability
probs <- 1 / (1 + exp(-riding_pred))
probs
## 1
## 0.491
# 6. What is the classification of a household with a $60K income and a
# lot size of 21,000 sqft? Use a cutoff of 0.5. And then answer the question
# using a cutoff or 0.75.
riding_pred = predict(riding_log, newdata = riding, type = "response")
newcustomers = data.frame(Income = 60, Lot_Size = 21)
riding_pred = predict(riding_log, newdata = newcustomers, type = "response")
riding_pred
## 1
## 0.71
riding_pred = ifelse(riding_pred> 0.5, "owner", "non-owner")
riding_pred
## 1
## "owner"
riding_pred = ifelse(riding_pred> 0.75, "owner", "non-owner")
riding_pred
## 1
## "owner"
# 7. What is the minimum income that a household with a 16000 ft sq lot size
# should have before it is classified as an owner?
riding_pred = predict(riding_log, newdata = riding, type = "response")
riding$riding_pred = riding_pred
riding %>% filter(Ownership == "owner") %>% arrange(Income, Lot_Size)
## X Income Lot_Size Ownership riding_pred
## 1 11.00 51.0 22.0 owner 0.718
## 2 11.10 51.0 22.0 owner 0.718
## 3 11.20 51.0 22.0 owner 0.718
## 4 11.30 51.0 22.0 owner 0.718
## 5 11.40 51.0 22.0 owner 0.718
## 6 11.50 51.0 22.0 owner 0.718
## 7 11.60 51.0 22.0 owner 0.718
## 8 11.70 51.0 22.0 owner 0.718
## 9 11.80 51.0 22.0 owner 0.718
## 10 11.90 51.0 22.0 owner 0.718
## 11 11.10 51.0 22.0 owner 0.718
## 12 11.11 51.0 22.0 owner 0.718
## 13 11.12 51.0 22.0 owner 0.718
## 14 11.13 51.0 22.0 owner 0.718
## 15 11.14 51.0 22.0 owner 0.718
## 16 11.15 51.0 22.0 owner 0.718
## 17 11.16 51.0 22.0 owner 0.718
## 18 11.17 51.0 22.0 owner 0.718
## 19 11.18 51.0 22.0 owner 0.718
## 20 11.19 51.0 22.0 owner 0.718
## 21 11.20 51.0 22.0 owner 0.718
## 22 11.21 51.0 22.0 owner 0.718
## 23 11.22 51.0 22.0 owner 0.718
## 24 11.23 51.0 22.0 owner 0.718
## 25 11.24 51.0 22.0 owner 0.718
## 26 11.25 51.0 22.0 owner 0.718
## 27 11.26 51.0 22.0 owner 0.718
## 28 11.27 51.0 22.0 owner 0.718
## 29 11.28 51.0 22.0 owner 0.718
## 30 11.29 51.0 22.0 owner 0.718
## 31 11.30 51.0 22.0 owner 0.718
## 32 11.31 51.0 22.0 owner 0.718
## 33 11.32 51.0 22.0 owner 0.718
## 34 11.33 51.0 22.0 owner 0.718
## 35 11.34 51.0 22.0 owner 0.718
## 36 11.35 51.0 22.0 owner 0.718
## 37 11.36 51.0 22.0 owner 0.718
## 38 11.37 51.0 22.0 owner 0.718
## 39 1.00 60.0 18.4 owner 0.178
## 40 1.10 60.0 18.4 owner 0.178
## 41 1.20 60.0 18.4 owner 0.178
## 42 1.30 60.0 18.4 owner 0.178
## 43 1.40 60.0 18.4 owner 0.178
## 44 1.50 60.0 18.4 owner 0.178
## 45 1.60 60.0 18.4 owner 0.178
## 46 1.70 60.0 18.4 owner 0.178
## 47 1.80 60.0 18.4 owner 0.178
## 48 1.90 60.0 18.4 owner 0.178
## 49 1.10 60.0 18.4 owner 0.178
## 50 1.11 60.0 18.4 owner 0.178
## 51 1.12 60.0 18.4 owner 0.178
## 52 1.13 60.0 18.4 owner 0.178
## 53 1.14 60.0 18.4 owner 0.178
## 54 1.15 60.0 18.4 owner 0.178
## 55 1.16 60.0 18.4 owner 0.178
## 56 1.17 60.0 18.4 owner 0.178
## 57 1.18 60.0 18.4 owner 0.178
## 58 1.19 60.0 18.4 owner 0.178
## 59 1.20 60.0 18.4 owner 0.178
## 60 1.21 60.0 18.4 owner 0.178
## 61 1.22 60.0 18.4 owner 0.178
## 62 1.23 60.0 18.4 owner 0.178
## 63 1.24 60.0 18.4 owner 0.178
## 64 1.25 60.0 18.4 owner 0.178
## 65 1.26 60.0 18.4 owner 0.178
## 66 1.27 60.0 18.4 owner 0.178
## 67 1.28 60.0 18.4 owner 0.178
## 68 1.29 60.0 18.4 owner 0.178
## 69 1.30 60.0 18.4 owner 0.178
## 70 1.31 60.0 18.4 owner 0.178
## 71 1.32 60.0 18.4 owner 0.178
## 72 1.33 60.0 18.4 owner 0.178
## 73 1.34 60.0 18.4 owner 0.178
## 74 1.35 60.0 18.4 owner 0.178
## 75 1.36 60.0 18.4 owner 0.178
## 76 1.37 60.0 18.4 owner 0.178
## 77 1.38 60.0 18.4 owner 0.178
## 78 1.39 60.0 18.4 owner 0.178
## 79 1.40 60.0 18.4 owner 0.178
## 80 1.41 60.0 18.4 owner 0.178
## 81 1.42 60.0 18.4 owner 0.178
## 82 1.43 60.0 18.4 owner 0.178
## 83 1.44 60.0 18.4 owner 0.178
## 84 1.45 60.0 18.4 owner 0.178
## 85 1.46 60.0 18.4 owner 0.178
## 86 1.47 60.0 18.4 owner 0.178
## 87 4.00 61.5 20.8 owner 0.703
## 88 4.10 61.5 20.8 owner 0.703
## 89 4.20 61.5 20.8 owner 0.703
## 90 4.30 61.5 20.8 owner 0.703
## 91 4.40 61.5 20.8 owner 0.703
## 92 4.50 61.5 20.8 owner 0.703
## 93 4.60 61.5 20.8 owner 0.703
## 94 4.70 61.5 20.8 owner 0.703
## 95 4.80 61.5 20.8 owner 0.703
## 96 4.90 61.5 20.8 owner 0.703
## 97 4.10 61.5 20.8 owner 0.703
## 98 4.11 61.5 20.8 owner 0.703
## 99 4.12 61.5 20.8 owner 0.703
## 100 4.13 61.5 20.8 owner 0.703
## 101 4.14 61.5 20.8 owner 0.703
## 102 4.15 61.5 20.8 owner 0.703
## 103 4.16 61.5 20.8 owner 0.703
## 104 4.17 61.5 20.8 owner 0.703
## 105 4.18 61.5 20.8 owner 0.703
## 106 4.19 61.5 20.8 owner 0.703
## 107 4.20 61.5 20.8 owner 0.703
## 108 4.21 61.5 20.8 owner 0.703
## 109 4.22 61.5 20.8 owner 0.703
## 110 4.23 61.5 20.8 owner 0.703
## 111 4.24 61.5 20.8 owner 0.703
## 112 4.25 61.5 20.8 owner 0.703
## 113 4.26 61.5 20.8 owner 0.703
## 114 4.27 61.5 20.8 owner 0.703
## 115 4.28 61.5 20.8 owner 0.703
## 116 4.29 61.5 20.8 owner 0.703
## 117 4.30 61.5 20.8 owner 0.703
## 118 4.31 61.5 20.8 owner 0.703
## 119 4.32 61.5 20.8 owner 0.703
## 120 4.33 61.5 20.8 owner 0.703
## 121 4.34 61.5 20.8 owner 0.703
## 122 4.35 61.5 20.8 owner 0.703
## 123 4.36 61.5 20.8 owner 0.703
## 124 4.37 61.5 20.8 owner 0.703
## 125 4.38 61.5 20.8 owner 0.703
## 126 4.39 61.5 20.8 owner 0.703
## 127 4.40 61.5 20.8 owner 0.703
## 128 4.41 61.5 20.8 owner 0.703
## 129 3.00 64.8 21.6 owner 0.874
## 130 3.10 64.8 21.6 owner 0.874
## 131 3.20 64.8 21.6 owner 0.874
## 132 3.30 64.8 21.6 owner 0.874
## 133 3.40 64.8 21.6 owner 0.874
## 134 3.50 64.8 21.6 owner 0.874
## 135 3.60 64.8 21.6 owner 0.874
## 136 3.70 64.8 21.6 owner 0.874
## 137 3.80 64.8 21.6 owner 0.874
## 138 3.90 64.8 21.6 owner 0.874
## 139 3.10 64.8 21.6 owner 0.874
## 140 3.11 64.8 21.6 owner 0.874
## 141 3.12 64.8 21.6 owner 0.874
## 142 3.13 64.8 21.6 owner 0.874
## 143 3.14 64.8 21.6 owner 0.874
## 144 3.15 64.8 21.6 owner 0.874
## 145 3.16 64.8 21.6 owner 0.874
## 146 3.17 64.8 21.6 owner 0.874
## 147 3.18 64.8 21.6 owner 0.874
## 148 3.19 64.8 21.6 owner 0.874
## 149 3.20 64.8 21.6 owner 0.874
## 150 3.21 64.8 21.6 owner 0.874
## 151 3.22 64.8 21.6 owner 0.874
## 152 3.23 64.8 21.6 owner 0.874
## 153 3.24 64.8 21.6 owner 0.874
## 154 3.25 64.8 21.6 owner 0.874
## 155 3.26 64.8 21.6 owner 0.874
## 156 3.27 64.8 21.6 owner 0.874
## 157 3.28 64.8 21.6 owner 0.874
## 158 3.29 64.8 21.6 owner 0.874
## 159 3.30 64.8 21.6 owner 0.874
## 160 3.31 64.8 21.6 owner 0.874
## 161 3.32 64.8 21.6 owner 0.874
## 162 3.33 64.8 21.6 owner 0.874
## 163 3.34 64.8 21.6 owner 0.874
## 164 3.35 64.8 21.6 owner 0.874
## 165 3.36 64.8 21.6 owner 0.874
## 166 3.37 64.8 21.6 owner 0.874
## 167 3.38 64.8 21.6 owner 0.874
## 168 3.39 64.8 21.6 owner 0.874
## 169 9.00 69.0 20.0 owner 0.702
## 170 9.10 69.0 20.0 owner 0.702
## 171 9.20 69.0 20.0 owner 0.702
## 172 9.30 69.0 20.0 owner 0.702
## 173 9.40 69.0 20.0 owner 0.702
## 174 9.50 69.0 20.0 owner 0.702
## 175 9.60 69.0 20.0 owner 0.702
## 176 9.70 69.0 20.0 owner 0.702
## 177 9.80 69.0 20.0 owner 0.702
## 178 9.90 69.0 20.0 owner 0.702
## 179 9.10 69.0 20.0 owner 0.702
## 180 9.11 69.0 20.0 owner 0.702
## 181 9.12 69.0 20.0 owner 0.702
## 182 9.13 69.0 20.0 owner 0.702
## 183 9.14 69.0 20.0 owner 0.702
## 184 9.15 69.0 20.0 owner 0.702
## 185 9.16 69.0 20.0 owner 0.702
## 186 9.17 69.0 20.0 owner 0.702
## 187 9.18 69.0 20.0 owner 0.702
## 188 9.19 69.0 20.0 owner 0.702
## 189 9.20 69.0 20.0 owner 0.702
## 190 9.21 69.0 20.0 owner 0.702
## 191 9.22 69.0 20.0 owner 0.702
## 192 9.23 69.0 20.0 owner 0.702
## 193 9.24 69.0 20.0 owner 0.702
## 194 9.25 69.0 20.0 owner 0.702
## 195 9.26 69.0 20.0 owner 0.702
## 196 9.27 69.0 20.0 owner 0.702
## 197 9.28 69.0 20.0 owner 0.702
## 198 9.29 69.0 20.0 owner 0.702
## 199 9.30 69.0 20.0 owner 0.702
## 200 9.31 69.0 20.0 owner 0.702
## 201 9.32 69.0 20.0 owner 0.702
## 202 9.33 69.0 20.0 owner 0.702
## 203 9.34 69.0 20.0 owner 0.702
## 204 9.35 69.0 20.0 owner 0.702
## 205 9.36 69.0 20.0 owner 0.702
## 206 9.37 69.0 20.0 owner 0.702
## 207 9.38 69.0 20.0 owner 0.702
## 208 9.39 69.0 20.0 owner 0.702
## 209 9.40 69.0 20.0 owner 0.702
## 210 9.41 69.0 20.0 owner 0.702
## 211 9.42 69.0 20.0 owner 0.702
## 212 9.43 69.0 20.0 owner 0.702
## 213 9.44 69.0 20.0 owner 0.702
## 214 12.00 81.0 20.0 owner 0.886
## 215 12.10 81.0 20.0 owner 0.886
## 216 12.20 81.0 20.0 owner 0.886
## 217 12.30 81.0 20.0 owner 0.886
## 218 12.40 81.0 20.0 owner 0.886
## 219 12.50 81.0 20.0 owner 0.886
## 220 12.60 81.0 20.0 owner 0.886
## 221 12.70 81.0 20.0 owner 0.886
## 222 12.80 81.0 20.0 owner 0.886
## 223 12.90 81.0 20.0 owner 0.886
## 224 12.10 81.0 20.0 owner 0.886
## 225 12.11 81.0 20.0 owner 0.886
## 226 12.12 81.0 20.0 owner 0.886
## 227 12.13 81.0 20.0 owner 0.886
## 228 12.14 81.0 20.0 owner 0.886
## 229 12.15 81.0 20.0 owner 0.886
## 230 12.16 81.0 20.0 owner 0.886
## 231 12.17 81.0 20.0 owner 0.886
## 232 12.18 81.0 20.0 owner 0.886
## 233 12.19 81.0 20.0 owner 0.886
## 234 12.20 81.0 20.0 owner 0.886
## 235 12.21 81.0 20.0 owner 0.886
## 236 12.22 81.0 20.0 owner 0.886
## 237 12.23 81.0 20.0 owner 0.886
## 238 12.24 81.0 20.0 owner 0.886
## 239 12.25 81.0 20.0 owner 0.886
## 240 12.26 81.0 20.0 owner 0.886
## 241 12.27 81.0 20.0 owner 0.886
## 242 12.28 81.0 20.0 owner 0.886
## 243 12.29 81.0 20.0 owner 0.886
## 244 12.30 81.0 20.0 owner 0.886
## 245 12.31 81.0 20.0 owner 0.886
## 246 12.32 81.0 20.0 owner 0.886
## 247 12.33 81.0 20.0 owner 0.886
## 248 12.34 81.0 20.0 owner 0.886
## 249 12.35 81.0 20.0 owner 0.886
## 250 12.36 81.0 20.0 owner 0.886
## 251 12.37 81.0 20.0 owner 0.886
## 252 12.38 81.0 20.0 owner 0.886
## 253 12.39 81.0 20.0 owner 0.886
## 254 12.40 81.0 20.0 owner 0.886
## 255 12.41 81.0 20.0 owner 0.886
## 256 12.42 81.0 20.0 owner 0.886
## 257 12.43 81.0 20.0 owner 0.886
## 258 8.00 82.8 22.4 owner 0.989
## 259 8.10 82.8 22.4 owner 0.989
## 260 8.20 82.8 22.4 owner 0.989
## 261 8.30 82.8 22.4 owner 0.989
## 262 8.40 82.8 22.4 owner 0.989
## 263 8.50 82.8 22.4 owner 0.989
## 264 8.60 82.8 22.4 owner 0.989
## 265 8.70 82.8 22.4 owner 0.989
## 266 8.80 82.8 22.4 owner 0.989
## 267 8.90 82.8 22.4 owner 0.989
## 268 8.10 82.8 22.4 owner 0.989
## 269 8.11 82.8 22.4 owner 0.989
## 270 8.12 82.8 22.4 owner 0.989
## 271 8.13 82.8 22.4 owner 0.989
## 272 8.14 82.8 22.4 owner 0.989
## 273 8.15 82.8 22.4 owner 0.989
## 274 8.16 82.8 22.4 owner 0.989
## 275 8.17 82.8 22.4 owner 0.989
## 276 8.18 82.8 22.4 owner 0.989
## 277 8.19 82.8 22.4 owner 0.989
## 278 8.20 82.8 22.4 owner 0.989
## 279 8.21 82.8 22.4 owner 0.989
## 280 8.22 82.8 22.4 owner 0.989
## 281 8.23 82.8 22.4 owner 0.989
## 282 8.24 82.8 22.4 owner 0.989
## 283 8.25 82.8 22.4 owner 0.989
## 284 8.26 82.8 22.4 owner 0.989
## 285 8.27 82.8 22.4 owner 0.989
## 286 8.28 82.8 22.4 owner 0.989
## 287 8.29 82.8 22.4 owner 0.989
## 288 8.30 82.8 22.4 owner 0.989
## 289 8.31 82.8 22.4 owner 0.989
## 290 8.32 82.8 22.4 owner 0.989
## 291 8.33 82.8 22.4 owner 0.989
## 292 8.34 82.8 22.4 owner 0.989
## 293 8.35 82.8 22.4 owner 0.989
## 294 8.36 82.8 22.4 owner 0.989
## 295 8.37 82.8 22.4 owner 0.989
## 296 8.38 82.8 22.4 owner 0.989
## 297 8.39 82.8 22.4 owner 0.989
## 298 8.40 82.8 22.4 owner 0.989
## 299 2.00 85.5 16.8 owner 0.381
## 300 2.10 85.5 16.8 owner 0.381
## 301 2.20 85.5 16.8 owner 0.381
## 302 2.30 85.5 16.8 owner 0.381
## 303 2.40 85.5 16.8 owner 0.381
## 304 2.50 85.5 16.8 owner 0.381
## 305 2.60 85.5 16.8 owner 0.381
## 306 2.70 85.5 16.8 owner 0.381
## 307 2.80 85.5 16.8 owner 0.381
## 308 2.90 85.5 16.8 owner 0.381
## 309 2.10 85.5 16.8 owner 0.381
## 310 2.11 85.5 16.8 owner 0.381
## 311 2.12 85.5 16.8 owner 0.381
## 312 2.13 85.5 16.8 owner 0.381
## 313 2.14 85.5 16.8 owner 0.381
## 314 2.15 85.5 16.8 owner 0.381
## 315 2.16 85.5 16.8 owner 0.381
## 316 2.17 85.5 16.8 owner 0.381
## 317 2.18 85.5 16.8 owner 0.381
## 318 2.19 85.5 16.8 owner 0.381
## 319 2.20 85.5 16.8 owner 0.381
## 320 2.21 85.5 16.8 owner 0.381
## 321 2.22 85.5 16.8 owner 0.381
## 322 2.23 85.5 16.8 owner 0.381
## 323 2.24 85.5 16.8 owner 0.381
## 324 2.25 85.5 16.8 owner 0.381
## 325 2.26 85.5 16.8 owner 0.381
## 326 2.27 85.5 16.8 owner 0.381
## 327 2.28 85.5 16.8 owner 0.381
## 328 2.29 85.5 16.8 owner 0.381
## 329 2.30 85.5 16.8 owner 0.381
## 330 2.31 85.5 16.8 owner 0.381
## 331 2.32 85.5 16.8 owner 0.381
## 332 2.33 85.5 16.8 owner 0.381
## 333 2.34 85.5 16.8 owner 0.381
## 334 2.35 85.5 16.8 owner 0.381
## 335 2.36 85.5 16.8 owner 0.381
## 336 5.00 87.0 23.6 owner 0.998
## 337 5.10 87.0 23.6 owner 0.998
## 338 5.20 87.0 23.6 owner 0.998
## 339 5.30 87.0 23.6 owner 0.998
## 340 5.40 87.0 23.6 owner 0.998
## 341 5.50 87.0 23.6 owner 0.998
## 342 5.60 87.0 23.6 owner 0.998
## 343 5.70 87.0 23.6 owner 0.998
## 344 5.80 87.0 23.6 owner 0.998
## 345 5.90 87.0 23.6 owner 0.998
## 346 5.10 87.0 23.6 owner 0.998
## 347 5.11 87.0 23.6 owner 0.998
## 348 5.12 87.0 23.6 owner 0.998
## 349 5.13 87.0 23.6 owner 0.998
## 350 5.14 87.0 23.6 owner 0.998
## 351 5.15 87.0 23.6 owner 0.998
## 352 5.16 87.0 23.6 owner 0.998
## 353 5.17 87.0 23.6 owner 0.998
## 354 5.18 87.0 23.6 owner 0.998
## 355 5.19 87.0 23.6 owner 0.998
## 356 5.20 87.0 23.6 owner 0.998
## 357 5.21 87.0 23.6 owner 0.998
## 358 5.22 87.0 23.6 owner 0.998
## 359 5.23 87.0 23.6 owner 0.998
## 360 5.24 87.0 23.6 owner 0.998
## 361 10.00 93.0 20.8 owner 0.982
## 362 10.10 93.0 20.8 owner 0.982
## 363 10.20 93.0 20.8 owner 0.982
## 364 10.30 93.0 20.8 owner 0.982
## 365 10.40 93.0 20.8 owner 0.982
## 366 10.50 93.0 20.8 owner 0.982
## 367 10.60 93.0 20.8 owner 0.982
## 368 10.70 93.0 20.8 owner 0.982
## 369 10.80 93.0 20.8 owner 0.982
## 370 10.90 93.0 20.8 owner 0.982
## 371 10.10 93.0 20.8 owner 0.982
## 372 10.11 93.0 20.8 owner 0.982
## 373 10.12 93.0 20.8 owner 0.982
## 374 10.13 93.0 20.8 owner 0.982
## 375 10.14 93.0 20.8 owner 0.982
## 376 10.15 93.0 20.8 owner 0.982
## 377 10.16 93.0 20.8 owner 0.982
## 378 10.17 93.0 20.8 owner 0.982
## 379 10.18 93.0 20.8 owner 0.982
## 380 10.19 93.0 20.8 owner 0.982
## 381 10.20 93.0 20.8 owner 0.982
## 382 10.21 93.0 20.8 owner 0.982
## 383 10.22 93.0 20.8 owner 0.982
## 384 10.23 93.0 20.8 owner 0.982
## 385 10.24 93.0 20.8 owner 0.982
## 386 10.25 93.0 20.8 owner 0.982
## 387 10.26 93.0 20.8 owner 0.982
## 388 10.27 93.0 20.8 owner 0.982
## 389 10.28 93.0 20.8 owner 0.982
## 390 10.29 93.0 20.8 owner 0.982
## 391 10.30 93.0 20.8 owner 0.982
## 392 10.31 93.0 20.8 owner 0.982
## 393 10.32 93.0 20.8 owner 0.982
## 394 10.33 93.0 20.8 owner 0.982
## 395 10.34 93.0 20.8 owner 0.982
## 396 10.35 93.0 20.8 owner 0.982
## 397 10.36 93.0 20.8 owner 0.982
## 398 10.37 93.0 20.8 owner 0.982
## 399 10.38 93.0 20.8 owner 0.982
## 400 7.00 108.0 17.6 owner 0.924
## 401 7.10 108.0 17.6 owner 0.924
## 402 7.20 108.0 17.6 owner 0.924
## 403 7.30 108.0 17.6 owner 0.924
## 404 7.40 108.0 17.6 owner 0.924
## 405 7.50 108.0 17.6 owner 0.924
## 406 7.60 108.0 17.6 owner 0.924
## 407 7.70 108.0 17.6 owner 0.924
## 408 7.80 108.0 17.6 owner 0.924
## 409 7.90 108.0 17.6 owner 0.924
## 410 7.10 108.0 17.6 owner 0.924
## 411 7.11 108.0 17.6 owner 0.924
## 412 7.12 108.0 17.6 owner 0.924
## 413 7.13 108.0 17.6 owner 0.924
## 414 7.14 108.0 17.6 owner 0.924
## 415 7.15 108.0 17.6 owner 0.924
## 416 7.16 108.0 17.6 owner 0.924
## 417 7.17 108.0 17.6 owner 0.924
## 418 7.18 108.0 17.6 owner 0.924
## 419 7.19 108.0 17.6 owner 0.924
## 420 7.20 108.0 17.6 owner 0.924
## 421 7.21 108.0 17.6 owner 0.924
## 422 7.22 108.0 17.6 owner 0.924
## 423 7.23 108.0 17.6 owner 0.924
## 424 7.24 108.0 17.6 owner 0.924
## 425 7.25 108.0 17.6 owner 0.924
## 426 7.26 108.0 17.6 owner 0.924
## 427 7.27 108.0 17.6 owner 0.924
## 428 7.28 108.0 17.6 owner 0.924
## 429 7.29 108.0 17.6 owner 0.924
## 430 7.30 108.0 17.6 owner 0.924
## 431 7.31 108.0 17.6 owner 0.924
## 432 7.32 108.0 17.6 owner 0.924
## 433 7.33 108.0 17.6 owner 0.924
## 434 7.34 108.0 17.6 owner 0.924
## 435 7.35 108.0 17.6 owner 0.924
## 436 7.36 108.0 17.6 owner 0.924
## 437 7.37 108.0 17.6 owner 0.924
## 438 7.38 108.0 17.6 owner 0.924
## 439 7.39 108.0 17.6 owner 0.924
## 440 7.40 108.0 17.6 owner 0.924
## 441 7.41 108.0 17.6 owner 0.924
## 442 7.42 108.0 17.6 owner 0.924
## 443 7.43 108.0 17.6 owner 0.924
## 444 7.44 108.0 17.6 owner 0.924
## 445 7.45 108.0 17.6 owner 0.924
## 446 7.46 108.0 17.6 owner 0.924
## 447 7.47 108.0 17.6 owner 0.924
## 448 7.48 108.0 17.6 owner 0.924
## 449 7.49 108.0 17.6 owner 0.924
## 450 7.50 108.0 17.6 owner 0.924
## 451 7.51 108.0 17.6 owner 0.924
## 452 6.00 110.1 19.2 owner 0.985
## 453 6.10 110.1 19.2 owner 0.985
## 454 6.20 110.1 19.2 owner 0.985
## 455 6.30 110.1 19.2 owner 0.985
## 456 6.40 110.1 19.2 owner 0.985
## 457 6.50 110.1 19.2 owner 0.985
## 458 6.60 110.1 19.2 owner 0.985
## 459 6.70 110.1 19.2 owner 0.985
## 460 6.80 110.1 19.2 owner 0.985
## 461 6.90 110.1 19.2 owner 0.985
## 462 6.10 110.1 19.2 owner 0.985
## 463 6.11 110.1 19.2 owner 0.985
## 464 6.12 110.1 19.2 owner 0.985
## 465 6.13 110.1 19.2 owner 0.985
## 466 6.14 110.1 19.2 owner 0.985
## 467 6.15 110.1 19.2 owner 0.985
## 468 6.16 110.1 19.2 owner 0.985
## 469 6.17 110.1 19.2 owner 0.985
## 470 6.18 110.1 19.2 owner 0.985
## 471 6.19 110.1 19.2 owner 0.985
## 472 6.20 110.1 19.2 owner 0.985
## 473 6.21 110.1 19.2 owner 0.985
## 474 6.22 110.1 19.2 owner 0.985
## 475 6.23 110.1 19.2 owner 0.985
## 476 6.24 110.1 19.2 owner 0.985
## 477 6.25 110.1 19.2 owner 0.985
## 478 6.26 110.1 19.2 owner 0.985
## 479 6.27 110.1 19.2 owner 0.985
## 480 6.28 110.1 19.2 owner 0.985
## 481 6.29 110.1 19.2 owner 0.985
## 482 6.30 110.1 19.2 owner 0.985
## 483 6.31 110.1 19.2 owner 0.985
## 484 6.32 110.1 19.2 owner 0.985
## 485 6.33 110.1 19.2 owner 0.985
## 486 6.34 110.1 19.2 owner 0.985
## 487 6.35 110.1 19.2 owner 0.985
## 488 6.36 110.1 19.2 owner 0.985
## 489 6.37 110.1 19.2 owner 0.985
min_income = riding %>% filter(Ownership == "owner"& Lot_Size >=16) %>%
arrange(Income, Lot_Size)
min(min_income[2])
## [1] 51
#Also: show a graph
# displaying the predicted probabilities of Ownership and Income
plot_model(riding_log, type = "pred", terms = c("Income", "Lot_Size[16]"))

#==========================================================================#
# Part II
# Identifying Good System Administrators
# A management consultant is studying the roles played by experience and
# training ina system administrator's ability to complete a set of tasks
# in a specified amount of time.
# In particular, she is interested in discriminating betwen administrators
# who are able to complete given tasks within a specified amount of time
# and those who are not. Data on the performance of 75 randomly selected
# administrators are stored in the file SystemAdministrators.csv.
# Experience: months of full-time system administrator experience;
# Training: number of relevant training credits;
# Completed: Yes or No depending on whether the administrator completed the
# tasks.
##################################################################
remove(list = ls())
dir()
## [1] "LogisticRegressionProject.R"
## [2] "LogisticRegressionProject_AROD.knit.md"
## [3] "LogisticRegressionProject_AROD.R"
## [4] "LogisticRegressionProject_AROD.spin.R"
## [5] "LogisticRegressionProject_AROD.spin.Rmd"
## [6] "RidingMowers.csv"
## [7] "RidingMowers_REV.csv"
## [8] "RidingMowersLargeSet.csv"
## [9] "SystemAdministrators.xls"
library(readxl)
library(caret)
library(tidyverse)
# Use the file SystemAdministrators.xls and read it using
# the function read_excel()
sa = read_excel("SystemAdministrators.xls", sheet = "data")
head(sa)
## # A tibble: 6 x 3
## Experience Training Completed
## <dbl> <dbl> <chr>
## 1 10.9 4 Yes
## 2 9.9 4 Yes
## 3 10.4 6 Yes
## 4 13.7 6 Yes
## 5 9.4 8 Yes
## 6 12.4 4 Yes
str(sa)
## Classes 'tbl_df', 'tbl' and 'data.frame': 75 obs. of 3 variables:
## $ Experience: num 10.9 9.9 10.4 13.7 9.4 12.4 7.9 8.9 10.2 11.4 ...
## $ Training : num 4 4 6 6 8 4 6 4 6 4 ...
## $ Completed : chr "Yes" "Yes" "Yes" "Yes" ...
#Exploratory Data Analysis
#tally the outcome variable
sa %>% tally(~Completed, .)
## Completed
## No Yes
## 60 15
#Show a boxplot of Training and the Otcome variable
ggplot(sa, aes( x= Completed, y = Training)) +
geom_boxplot()

#Show a boxplot of Experience and the Outcome variable
ggplot(sa, aes( x= Completed, y = Experience)) +
geom_boxplot()

# Create a scatter plot of Experience vs. Training using colors
# or symbol to distinguish administrators who completed the task
# and those who didn't. Which predictor(s) appear(s) potentially
# useful for classfying task completion?
ggplot(sa, aes(y = Experience, x = Training)) +
geom_point(aes(col = Completed))

# Run a logistic regression model with both predictors using the
# entire dataset as training data. NOte: recall that the outcome
# variable should be a factor variable
sa$Completed = as.factor(sa$Completed)
sa_logit = glm(Completed ~., data= sa, family = "binomial")
summary(sa_logit)
##
## Call:
## glm(formula = Completed ~ ., family = "binomial", data = sa)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.653 -0.350 -0.175 -0.082 2.218
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -10.981 2.892 -3.80 0.00015 ***
## Experience 1.127 0.291 3.87 0.00011 ***
## Training 0.181 0.339 0.53 0.59397
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 75.060 on 74 degrees of freedom
## Residual deviance: 35.713 on 72 degrees of freedom
## AIC: 41.71
##
## Number of Fisher Scoring iterations: 6
# Among those who completed
# the task, what is the percentage of programmers incorrectly
# classified as failing to complete the task?
sa_pred = predict( sa_logit, newdata = sa, type = "response")
sa_pred
## 1 2 3 4 5 6 7 8
## 0.883323 0.710404 0.860786 0.996092 0.741910 0.976216 0.269828 0.442855
## 9 10 11 12 13 14 15 16
## 0.831521 0.930069 0.361776 0.527097 0.974614 0.155173 0.085430 0.008686
## 17 18 19 20 21 22 23 24
## 0.130444 0.009712 0.007768 0.004963 0.003965 0.040715 0.006945 0.012138
## 25 26 27 28 29 30 31 32
## 0.442855 0.024124 0.015160 0.021129 0.170527 0.000734 0.062454 0.051599
## 33 34 35 36 37 38 39 40
## 0.078674 0.002021 0.009712 0.015160 0.158204 0.155173 0.448522 0.970381
## 41 42 43 44 45 46 47 48
## 0.023590 0.032768 0.024124 0.003544 0.011362 0.187063 0.085430 0.004436
## 49 50 51 52 53 54 55 56
## 0.032768 0.008686 0.077027 0.012138 0.069387 0.023590 0.006210 0.104765
## 57 58 59 60 61 62 63 64
## 0.012138 0.010858 0.527097 0.006945 0.037350 0.223772 0.003168 0.040715
## 65 66 67 68 69 70 71 72
## 0.056172 0.050488 0.231842 0.050488 0.265332 0.026330 0.018920 0.052733
## 73 74 75
## 0.063811 0.002530 0.013566
sa_pred = ifelse(sa_pred >=0.5, "Yes", "No")
sa_pred = as.factor(sa_pred)
confusionMatrix(sa_pred, sa$Completed)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 58 5
## Yes 2 10
##
## Accuracy : 0.907
## 95% CI : (0.817, 0.962)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.0104
##
## Kappa : 0.685
## Mcnemar's Test P-Value : 0.4497
##
## Sensitivity : 0.967
## Specificity : 0.667
## Pos Pred Value : 0.921
## Neg Pred Value : 0.833
## Prevalence : 0.800
## Detection Rate : 0.773
## Detection Prevalence : 0.840
## Balanced Accuracy : 0.817
##
## 'Positive' Class : No
##
# To decrease the percentage in the answer above, should
# the cutoff probability be increase or decreased?
sa_pred = predict( sa_logit, newdata = sa, type = "response")
sa_pred = ifelse(sa_pred >=0.35, "Yes", "No")
sa_pred = as.factor(sa_pred)
confusionMatrix(sa_pred, sa$Completed)
## Confusion Matrix and Statistics
##
## Reference
## Prediction No Yes
## No 56 3
## Yes 4 12
##
## Accuracy : 0.907
## 95% CI : (0.817, 0.962)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.0104
##
## Kappa : 0.715
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.933
## Specificity : 0.800
## Pos Pred Value : 0.949
## Neg Pred Value : 0.750
## Prevalence : 0.800
## Detection Rate : 0.747
## Detection Prevalence : 0.787
## Balanced Accuracy : 0.867
##
## 'Positive' Class : No
##
# How much experience must be accumulated by a programmer with 4
# years of training before his or her estimated probability
# of completing the task exceeds 0.5?
sa_pred = predict( sa_logit, newdata = sa, type = "response")
sa$sa_pred = sa_pred
sa %>% filter(sa_pred >= 0.5 & Training == 4) %>% arrange(Experience)
## # A tibble: 7 x 4
## Experience Training Completed sa_pred
## <dbl> <dbl> <fct> <dbl>
## 1 9.2 4 Yes 0.527
## 2 9.2 4 No 0.527
## 3 9.9 4 Yes 0.710
## 4 10.9 4 Yes 0.883
## 5 11.4 4 Yes 0.930
## 6 12.2 4 No 0.970
## 7 12.4 4 Yes 0.976
#A relevant
#predicted prob of completed task graph could supplement the answer.
plot_model(sa_logit, type = "pred", terms= c("Experience", "Training[4]"))

#=================================================================#