#LOGISTIC REGRESSION PROJECT

##########################################################################
# ##############################################################################
# Part 1 Riding Mowers Reprise
# A company that manufactures riding mowers wants to identify the best sales 
# prospects for an intenstive sales campaign.

# In particular, the company is interested in classifying households as prospective
# owners or non-owners on the basis of Income (in $1,000) and Lot-Size (in 1000 sqft).

# Use all the data to fit a logistic regression of ownership on the two predictors

##########################################################################

setwd("C:/Users/arodriguez/Dropbox/classes/DataMining/LogisticRegression/logistic_project")
options(digits = 3, scipen = 9999)
remove(list = ls())

    library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.1.0     v purrr   0.2.5
## v tibble  2.0.1     v dplyr   0.7.8
## v tidyr   0.8.2     v stringr 1.3.1
## v readr   1.3.1     v forcats 0.3.0
## -- Conflicts ----------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
    library(mosaic)
## Loading required package: lattice
## Loading required package: ggformula
## Loading required package: ggstance
## 
## Attaching package: 'ggstance'
## The following objects are masked from 'package:ggplot2':
## 
##     geom_errorbarh, GeomErrorbarh
## 
## New to ggformula?  Try the tutorials: 
##  learnr::run_tutorial("introduction", package = "ggformula")
##  learnr::run_tutorial("refining", package = "ggformula")
## Loading required package: mosaicData
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
## 
##     expand
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Note: If you use the Matrix package, be sure to load it BEFORE loading mosaic.
## 
## Attaching package: 'mosaic'
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following object is masked from 'package:purrr':
## 
##     cross
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median,
##     prop.test, quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
    library(caret)
## 
## Attaching package: 'caret'
## The following object is masked from 'package:mosaic':
## 
##     dotPlot
## The following object is masked from 'package:purrr':
## 
##     lift
    library(sjPlot)

# ##############################################################################

# For the Riding Mowers Data Set (Revised) show the following and answer the questions.

# 1. What percentage of households in the study were owners of a riding mower?

# 2. Create a scatter plot of Income vs. Lot Size using color or symbol to
#    distinguish owner from non-owner

# 3. Among non-owners - what is the percentage of households classified correctly?

# 4. To increase the percentage of correctly classified nonowners, should the 
#    cutoff probability be increased or decreased?

# 5. What is the probability that a household with a $60K income and a lot size of
#20,000 sqft is an owner?  what are the (log) odds?

# 6. What is the classification of a household with a $60K income and a 
# lot size of 21,000 sqft?  Use a cutoff of 0.5. And then answer the question
# using a cutoff or 0.75.

# 7. What is the minimum income that a household with a 16000 ft sq lot size 
# should have before it is classified as an owner?  Also: show a graph
# displaying the predicted probabilities of Ownership and Income

################################################################################
# Read the dataset
dir()
## [1] "LogisticRegressionProject.R"            
## [2] "LogisticRegressionProject_AROD.knit.md" 
## [3] "LogisticRegressionProject_AROD.R"       
## [4] "LogisticRegressionProject_AROD.spin.R"  
## [5] "LogisticRegressionProject_AROD.spin.Rmd"
## [6] "RidingMowers.csv"                       
## [7] "RidingMowers_REV.csv"                   
## [8] "RidingMowersLargeSet.csv"               
## [9] "SystemAdministrators.xls"
riding = read.csv("RidingMowers_REV.csv", header = T)
head(riding)
##      X Income Lot_Size Ownership
## 1  4.0   61.5     20.8     owner
## 2 23.0   51.0     14.0 non-owner
## 3  4.1   61.5     20.8     owner
## 4 24.0   63.0     14.8 non-owner
## 5  6.0  110.1     19.2     owner
## 6  5.0   87.0     23.6     owner
str(riding)
## 'data.frame':    1000 obs. of  4 variables:
##  $ X        : num  4 23 4.1 24 6 5 2 16 1 21 ...
##  $ Income   : num  61.5 51 61.5 63 110.1 ...
##  $ Lot_Size : num  20.8 14 20.8 14.8 19.2 23.6 16.8 20.4 18.4 16.4 ...
##  $ Ownership: Factor w/ 2 levels "non-owner","owner": 2 1 2 1 2 2 2 1 2 1 ...
summary(riding)
##        X             Income         Lot_Size        Ownership  
##  Min.   : 1.00   Min.   : 33.0   Min.   :14.0   non-owner:511  
##  1st Qu.: 7.27   1st Qu.: 51.0   1st Qu.:17.6   owner    :489  
##  Median :13.19   Median : 64.8   Median :18.8                  
##  Mean   :12.88   Mean   : 68.4   Mean   :18.9                  
##  3rd Qu.:18.46   3rd Qu.: 82.8   3rd Qu.:20.8                  
##  Max.   :24.90   Max.   :110.1   Max.   :23.6
names(riding)
## [1] "X"         "Income"    "Lot_Size"  "Ownership"
# 1. What percentage of households in the study were owners of a riding mower?
        tally(~ Ownership, data = riding)
## Ownership
## non-owner     owner 
##       511       489
        prop.table(table(riding$Ownership))
## 
## non-owner     owner 
##     0.511     0.489
        # 2. Create a scatter plot of Income vs. Lot Size using color or symbol to
        #    distinguish owner from non-owner
        
        ggplot(riding, aes(x = Lot_Size, y = Income, col = Ownership)) +
          geom_point(size = 4)

        # 3. Among non-owners - what is the percentage of households classified correctly?
              #Logistic regression with the full data set
              riding_log = glm(Ownership ~ Income + Lot_Size, 
                               data = riding, family = "binomial")
              summary(riding_log)
## 
## Call:
## glm(formula = Ownership ~ Income + Lot_Size, family = "binomial", 
##     data = riding)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -1.658  -0.465  -0.054   0.499   1.857  
## 
## Coefficients:
##              Estimate Std. Error z value            Pr(>|z|)    
## (Intercept) -24.66138    1.66343   -14.8 <0.0000000000000002 ***
## Income        0.09941    0.00739    13.4 <0.0000000000000002 ***
## Lot_Size      0.93306    0.06901    13.5 <0.0000000000000002 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1385.81  on 999  degrees of freedom
## Residual deviance:  697.25  on 997  degrees of freedom
## AIC: 703.2
## 
## Number of Fisher Scoring iterations: 6
                    plot_model(riding_log)  

                    plot_model(riding_log, type = "pred")
## $Income

## 
## $Lot_Size

                    riding_pred = predict(riding_log, newdata = riding, type = "response")
                    riding_pred
##       1       2       3       4       5       6       7       8       9 
## 0.70266 0.00146 0.70266 0.01006 0.98520 0.99755 0.38074 0.20876 0.17822 
##      10      11      12      13      14      15      16      17      18 
## 0.00950 0.01006 0.70266 0.52771 0.92392 0.98520 0.98520 0.71826 0.49878 
##      19      20      21      22      23      24      25      26      27 
## 0.03394 0.74694 0.20876 0.20876 0.38074 0.70266 0.10239 0.02106 0.98520 
##      28      29      30      31      32      33      34      35      36 
## 0.98186 0.10239 0.74694 0.03394 0.98520 0.03394 0.70266 0.20876 0.52771 
##      37      38      39      40      41      42      43      44      45 
## 0.98869 0.99755 0.92392 0.88615 0.70266 0.92392 0.74694 0.20876 0.28252 
##      46      47      48      49      50      51      52      53      54 
## 0.17822 0.92392 0.52771 0.00146 0.38074 0.01006 0.70248 0.71826 0.52771 
##      55      56      57      58      59      60      61      62      63 
## 0.70266 0.74694 0.99755 0.52771 0.87375 0.98186 0.98869 0.38074 0.98186 
##      64      65      66      67      68      69      70      71      72 
## 0.02106 0.02106 0.02106 0.28252 0.00146 0.10239 0.70266 0.03394 0.99755 
##      73      74      75      76      77      78      79      80      81 
## 0.70266 0.20876 0.17822 0.49878 0.71826 0.02106 0.28252 0.17822 0.98186 
##      82      83      84      85      86      87      88      89      90 
## 0.49878 0.49878 0.03394 0.02130 0.99755 0.98520 0.00950 0.98186 0.98186 
##      91      92      93      94      95      96      97      98      99 
## 0.02106 0.52771 0.03394 0.70248 0.49878 0.98869 0.02106 0.52771 0.99755 
##     100     101     102     103     104     105     106     107     108 
## 0.87375 0.17822 0.20876 0.28252 0.52771 0.01006 0.98186 0.10239 0.71826 
##     109     110     111     112     113     114     115     116     117 
## 0.70248 0.74694 0.00950 0.49878 0.49878 0.38074 0.03394 0.74694 0.03394 
##     118     119     120     121     122     123     124     125     126 
## 0.10239 0.99755 0.98520 0.52771 0.52771 0.70248 0.00950 0.49878 0.17822 
##     127     128     129     130     131     132     133     134     135 
## 0.98520 0.00950 0.00950 0.10239 0.92392 0.92392 0.52771 0.01006 0.02106 
##     136     137     138     139     140     141     142     143     144 
## 0.03394 0.10239 0.02106 0.52771 0.17822 0.71826 0.03394 0.52771 0.02130 
##     145     146     147     148     149     150     151     152     153 
## 0.49878 0.92392 0.92392 0.38074 0.98186 0.70248 0.02130 0.28252 0.28252 
##     154     155     156     157     158     159     160     161     162 
## 0.49878 0.52771 0.88615 0.10239 0.03394 0.17822 0.52771 0.03394 0.38074 
##     163     164     165     166     167     168     169     170     171 
## 0.38074 0.10239 0.92392 0.98520 0.92392 0.98186 0.17822 0.03394 0.00146 
##     172     173     174     175     176     177     178     179     180 
## 0.52771 0.70248 0.28252 0.02130 0.00950 0.38074 0.02106 0.74694 0.88615 
##     181     182     183     184     185     186     187     188     189 
## 0.98869 0.52771 0.99755 0.70248 0.20876 0.10239 0.03394 0.74694 0.98520 
##     190     191     192     193     194     195     196     197     198 
## 0.10239 0.02106 0.92392 0.92392 0.02106 0.28252 0.87375 0.17822 0.00146 
##     199     200     201     202     203     204     205     206     207 
## 0.98869 0.00950 0.00950 0.98186 0.70266 0.92392 0.74694 0.98869 0.02106 
##     208     209     210     211     212     213     214     215     216 
## 0.02130 0.74694 0.28252 0.49878 0.98869 0.98869 0.28252 0.20876 0.03394 
##     217     218     219     220     221     222     223     224     225 
## 0.02106 0.74694 0.74694 0.02130 0.38074 0.88615 0.88615 0.87375 0.74694 
##     226     227     228     229     230     231     232     233     234 
## 0.98869 0.70266 0.10239 0.74694 0.71826 0.52771 0.20876 0.38074 0.03394 
##     235     236     237     238     239     240     241     242     243 
## 0.00146 0.00950 0.28252 0.98186 0.17822 0.02130 0.49878 0.10239 0.98186 
##     244     245     246     247     248     249     250     251     252 
## 0.92392 0.70266 0.70266 0.01006 0.87375 0.87375 0.10239 0.10239 0.70266 
##     253     254     255     256     257     258     259     260     261 
## 0.70266 0.88615 0.03394 0.98520 0.03394 0.92392 0.88615 0.28252 0.99755 
##     262     263     264     265     266     267     268     269     270 
## 0.98869 0.03394 0.17822 0.02130 0.52771 0.98520 0.71826 0.98520 0.70248 
##     271     272     273     274     275     276     277     278     279 
## 0.92392 0.02106 0.74694 0.02106 0.88615 0.20876 0.02106 0.00950 0.92392 
##     280     281     282     283     284     285     286     287     288 
## 0.52771 0.00950 0.52771 0.52771 0.49878 0.52771 0.03394 0.17822 0.28252 
##     289     290     291     292     293     294     295     296     297 
## 0.98869 0.74694 0.00950 0.74694 0.70248 0.03394 0.38074 0.20876 0.92392 
##     298     299     300     301     302     303     304     305     306 
## 0.17822 0.28252 0.98520 0.38074 0.03394 0.74694 0.87375 0.17822 0.17822 
##     307     308     309     310     311     312     313     314     315 
## 0.71826 0.98869 0.17822 0.20876 0.71826 0.74694 0.70248 0.52771 0.87375 
##     316     317     318     319     320     321     322     323     324 
## 0.28252 0.02130 0.38074 0.88615 0.00146 0.02130 0.92392 0.71826 0.52771 
##     325     326     327     328     329     330     331     332     333 
## 0.28252 0.71826 0.10239 0.92392 0.28252 0.20876 0.20876 0.02106 0.00950 
##     334     335     336     337     338     339     340     341     342 
## 0.71826 0.02130 0.74694 0.71826 0.28252 0.87375 0.00146 0.92392 0.20876 
##     343     344     345     346     347     348     349     350     351 
## 0.98869 0.28252 0.02130 0.03394 0.74694 0.49878 0.03394 0.00146 0.98186 
##     352     353     354     355     356     357     358     359     360 
## 0.92392 0.92392 0.02106 0.98520 0.98520 0.17822 0.17822 0.92392 0.17822 
##     361     362     363     364     365     366     367     368     369 
## 0.92392 0.87375 0.70248 0.98520 0.02130 0.74694 0.70248 0.10239 0.70248 
##     370     371     372     373     374     375     376     377     378 
## 0.03394 0.92392 0.98186 0.03394 0.17822 0.98186 0.70248 0.49878 0.03394 
##     379     380     381     382     383     384     385     386     387 
## 0.00950 0.88615 0.98186 0.20876 0.70248 0.71826 0.49878 0.49878 0.49878 
##     388     389     390     391     392     393     394     395     396 
## 0.98520 0.17822 0.20876 0.03394 0.02106 0.17822 0.02106 0.00950 0.00950 
##     397     398     399     400     401     402     403     404     405 
## 0.71826 0.10239 0.70266 0.28252 0.00950 0.71826 0.38074 0.49878 0.98869 
##     406     407     408     409     410     411     412     413     414 
## 0.98520 0.17822 0.10239 0.70266 0.38074 0.52771 0.03394 0.28252 0.49878 
##     415     416     417     418     419     420     421     422     423 
## 0.87375 0.98869 0.98869 0.70266 0.00950 0.88615 0.74694 0.00950 0.10239 
##     424     425     426     427     428     429     430     431     432 
## 0.02130 0.70248 0.17822 0.20876 0.03394 0.98520 0.70266 0.88615 0.20876 
##     433     434     435     436     437     438     439     440     441 
## 0.28252 0.52771 0.71826 0.74694 0.98869 0.88615 0.98520 0.38074 0.87375 
##     442     443     444     445     446     447     448     449     450 
## 0.02106 0.00950 0.98869 0.01006 0.98186 0.10239 0.02130 0.20876 0.74694 
##     451     452     453     454     455     456     457     458     459 
## 0.52771 0.38074 0.00950 0.17822 0.00146 0.17822 0.98186 0.52771 0.98520 
##     460     461     462     463     464     465     466     467     468 
## 0.20876 0.52771 0.99755 0.03394 0.52771 0.10239 0.49878 0.99755 0.74694 
##     469     470     471     472     473     474     475     476     477 
## 0.87375 0.02106 0.49878 0.17822 0.74694 0.00146 0.01006 0.99755 0.92392 
##     478     479     480     481     482     483     484     485     486 
## 0.52771 0.88615 0.74694 0.71826 0.87375 0.74694 0.98186 0.00950 0.70266 
##     487     488     489     490     491     492     493     494     495 
## 0.92392 0.49878 0.70248 0.87375 0.88615 0.87375 0.98869 0.71826 0.20876 
##     496     497     498     499     500     501     502     503     504 
## 0.70248 0.98520 0.02130 0.52771 0.00950 0.74694 0.88615 0.88615 0.01006 
##     505     506     507     508     509     510     511     512     513 
## 0.99755 0.99755 0.17822 0.98520 0.03394 0.00146 0.00950 0.17822 0.01006 
##     514     515     516     517     518     519     520     521     522 
## 0.20876 0.74694 0.17822 0.10239 0.38074 0.98186 0.87375 0.88615 0.70266 
##     523     524     525     526     527     528     529     530     531 
## 0.92392 0.00146 0.87375 0.52771 0.88615 0.10239 0.10239 0.17822 0.00950 
##     532     533     534     535     536     537     538     539     540 
## 0.02130 0.98186 0.02106 0.98869 0.03394 0.17822 0.52771 0.74694 0.52771 
##     541     542     543     544     545     546     547     548     549 
## 0.01006 0.03394 0.70266 0.28252 0.92392 0.49878 0.70266 0.74694 0.92392 
##     550     551     552     553     554     555     556     557     558 
## 0.00146 0.00146 0.87375 0.00950 0.02106 0.70266 0.99755 0.49878 0.00146 
##     559     560     561     562     563     564     565     566     567 
## 0.00950 0.92392 0.98869 0.01006 0.10239 0.17822 0.87375 0.87375 0.70266 
##     568     569     570     571     572     573     574     575     576 
## 0.88615 0.98869 0.70266 0.52771 0.02106 0.88615 0.70266 0.98186 0.71826 
##     577     578     579     580     581     582     583     584     585 
## 0.00950 0.00950 0.20876 0.00950 0.01006 0.38074 0.52771 0.00950 0.10239 
##     586     587     588     589     590     591     592     593     594 
## 0.00146 0.70266 0.88615 0.00950 0.87375 0.38074 0.03394 0.74694 0.87375 
##     595     596     597     598     599     600     601     602     603 
## 0.88615 0.49878 0.38074 0.70266 0.38074 0.99755 0.88615 0.02106 0.02130 
##     604     605     606     607     608     609     610     611     612 
## 0.98520 0.71826 0.49878 0.01006 0.02106 0.00950 0.01006 0.28252 0.03394 
##     613     614     615     616     617     618     619     620     621 
## 0.70266 0.98520 0.03394 0.99755 0.87375 0.03394 0.92392 0.88615 0.87375 
##     622     623     624     625     626     627     628     629     630 
## 0.98520 0.70266 0.28252 0.70248 0.98186 0.88615 0.02130 0.01006 0.03394 
##     631     632     633     634     635     636     637     638     639 
## 0.70266 0.92392 0.28252 0.28252 0.88615 0.87375 0.98869 0.00146 0.00146 
##     640     641     642     643     644     645     646     647     648 
## 0.98186 0.71826 0.28252 0.02130 0.88615 0.70266 0.74694 0.98869 0.02130 
##     649     650     651     652     653     654     655     656     657 
## 0.02106 0.87375 0.88615 0.70266 0.10239 0.00950 0.87375 0.92392 0.98186 
##     658     659     660     661     662     663     664     665     666 
## 0.98869 0.88615 0.70248 0.28252 0.02106 0.01006 0.92392 0.03394 0.17822 
##     667     668     669     670     671     672     673     674     675 
## 0.98520 0.00146 0.99755 0.28252 0.00146 0.99755 0.98186 0.17822 0.88615 
##     676     677     678     679     680     681     682     683     684 
## 0.98869 0.00950 0.88615 0.87375 0.17822 0.00146 0.71826 0.38074 0.98186 
##     685     686     687     688     689     690     691     692     693 
## 0.70248 0.20876 0.88615 0.02130 0.00146 0.71826 0.01006 0.99755 0.38074 
##     694     695     696     697     698     699     700     701     702 
## 0.03394 0.92392 0.10239 0.20876 0.00146 0.28252 0.20876 0.02106 0.02130 
##     703     704     705     706     707     708     709     710     711 
## 0.02106 0.00950 0.28252 0.00146 0.70248 0.28252 0.17822 0.10239 0.03394 
##     712     713     714     715     716     717     718     719     720 
## 0.00950 0.71826 0.52771 0.70266 0.71826 0.88615 0.01006 0.92392 0.28252 
##     721     722     723     724     725     726     727     728     729 
## 0.00146 0.92392 0.52771 0.49878 0.17822 0.02130 0.70248 0.92392 0.49878 
##     730     731     732     733     734     735     736     737     738 
## 0.74694 0.71826 0.38074 0.70266 0.88615 0.74694 0.49878 0.99755 0.98520 
##     739     740     741     742     743     744     745     746     747 
## 0.03394 0.38074 0.38074 0.00146 0.98869 0.02130 0.92392 0.98869 0.02106 
##     748     749     750     751     752     753     754     755     756 
## 0.20876 0.52771 0.70248 0.02106 0.02106 0.71826 0.49878 0.98869 0.98186 
##     757     758     759     760     761     762     763     764     765 
## 0.01006 0.70248 0.02130 0.38074 0.98520 0.92392 0.38074 0.92392 0.49878 
##     766     767     768     769     770     771     772     773     774 
## 0.00146 0.74694 0.03394 0.10239 0.98186 0.98869 0.02130 0.70248 0.52771 
##     775     776     777     778     779     780     781     782     783 
## 0.98520 0.52771 0.00950 0.74694 0.49878 0.70248 0.20876 0.28252 0.49878 
##     784     785     786     787     788     789     790     791     792 
## 0.87375 0.70248 0.88615 0.88615 0.28252 0.88615 0.01006 0.74694 0.87375 
##     793     794     795     796     797     798     799     800     801 
## 0.38074 0.02130 0.71826 0.70248 0.98869 0.20876 0.71826 0.70248 0.88615 
##     802     803     804     805     806     807     808     809     810 
## 0.28252 0.17822 0.74694 0.01006 0.98186 0.03394 0.70266 0.28252 0.38074 
##     811     812     813     814     815     816     817     818     819 
## 0.98869 0.98186 0.70248 0.20876 0.10239 0.38074 0.00950 0.98520 0.00950 
##     820     821     822     823     824     825     826     827     828 
## 0.52771 0.03394 0.38074 0.01006 0.70248 0.01006 0.02106 0.02130 0.17822 
##     829     830     831     832     833     834     835     836     837 
## 0.52771 0.88615 0.49878 0.70266 0.88615 0.71826 0.52771 0.28252 0.01006 
##     838     839     840     841     842     843     844     845     846 
## 0.02106 0.70248 0.98186 0.92392 0.01006 0.38074 0.17822 0.52771 0.98520 
##     847     848     849     850     851     852     853     854     855 
## 0.92392 0.98869 0.20876 0.99755 0.70266 0.03394 0.10239 0.01006 0.98869 
##     856     857     858     859     860     861     862     863     864 
## 0.71826 0.98186 0.70248 0.00146 0.01006 0.10239 0.49878 0.70248 0.38074 
##     865     866     867     868     869     870     871     872     873 
## 0.70266 0.70266 0.70248 0.01006 0.00146 0.70266 0.99755 0.99755 0.87375 
##     874     875     876     877     878     879     880     881     882 
## 0.74694 0.17822 0.74694 0.00950 0.00950 0.92392 0.98186 0.70248 0.28252 
##     883     884     885     886     887     888     889     890     891 
## 0.00950 0.74694 0.71826 0.02106 0.17822 0.70248 0.10239 0.02106 0.00146 
##     892     893     894     895     896     897     898     899     900 
## 0.92392 0.74694 0.28252 0.87375 0.00146 0.74694 0.87375 0.70248 0.00950 
##     901     902     903     904     905     906     907     908     909 
## 0.00950 0.00146 0.00146 0.71826 0.92392 0.70248 0.70248 0.87375 0.03394 
##     910     911     912     913     914     915     916     917     918 
## 0.98869 0.38074 0.98186 0.20876 0.01006 0.20876 0.20876 0.71826 0.98869 
##     919     920     921     922     923     924     925     926     927 
## 0.02130 0.87375 0.99755 0.28252 0.28252 0.52771 0.74694 0.98520 0.92392 
##     928     929     930     931     932     933     934     935     936 
## 0.98869 0.17822 0.28252 0.52771 0.20876 0.71826 0.00146 0.98186 0.02106 
##     937     938     939     940     941     942     943     944     945 
## 0.03394 0.00950 0.71826 0.28252 0.98520 0.01006 0.88615 0.20876 0.20876 
##     946     947     948     949     950     951     952     953     954 
## 0.00146 0.02106 0.98520 0.74694 0.70248 0.52771 0.98869 0.98186 0.02130 
##     955     956     957     958     959     960     961     962     963 
## 0.87375 0.92392 0.17822 0.00146 0.74694 0.74694 0.01006 0.28252 0.74694 
##     964     965     966     967     968     969     970     971     972 
## 0.98869 0.87375 0.00146 0.10239 0.02106 0.98186 0.98520 0.02130 0.49878 
##     973     974     975     976     977     978     979     980     981 
## 0.02106 0.20876 0.02106 0.00146 0.92392 0.02130 0.70248 0.17822 0.74694 
##     982     983     984     985     986     987     988     989     990 
## 0.03394 0.88615 0.98869 0.92392 0.49878 0.01006 0.17822 0.10239 0.74694 
##     991     992     993     994     995     996     997     998     999 
## 0.98869 0.70248 0.52771 0.98186 0.70248 0.87375 0.71826 0.88615 0.87375 
##    1000 
## 0.98520
                    riding_pred = ifelse(riding_pred> 0.5, "owner", "non-owner")
                    riding_pred = as.factor(riding_pred)
                    confusionMatrix(riding_pred, riding$Ownership)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  non-owner owner
##   non-owner       407    85
##   owner           104   404
##                                              
##                Accuracy : 0.811              
##                  95% CI : (0.785, 0.835)     
##     No Information Rate : 0.511              
##     P-Value [Acc > NIR] : <0.0000000000000002
##                                              
##                   Kappa : 0.622              
##  Mcnemar's Test P-Value : 0.19               
##                                              
##             Sensitivity : 0.796              
##             Specificity : 0.826              
##          Pos Pred Value : 0.827              
##          Neg Pred Value : 0.795              
##              Prevalence : 0.511              
##          Detection Rate : 0.407              
##    Detection Prevalence : 0.492              
##       Balanced Accuracy : 0.811              
##                                              
##        'Positive' Class : non-owner          
## 
    # 4. To increase the percentage of correctly classified nonowners, should the 
    #    cutoff probability be increased or decreased?
    riding_pred = predict(riding_log, newdata = riding, type = "response")
    riding_pred = ifelse(riding_pred> 0.65, "owner", "non-owner")
    riding_pred = as.factor(riding_pred)
    confusionMatrix(riding_pred, riding$Ownership)
## Confusion Matrix and Statistics
## 
##            Reference
## Prediction  non-owner owner
##   non-owner       458    85
##   owner            53   404
##                                               
##                Accuracy : 0.862               
##                  95% CI : (0.839, 0.883)      
##     No Information Rate : 0.511               
##     P-Value [Acc > NIR] : < 0.0000000000000002
##                                               
##                   Kappa : 0.723               
##  Mcnemar's Test P-Value : 0.00832             
##                                               
##             Sensitivity : 0.896               
##             Specificity : 0.826               
##          Pos Pred Value : 0.843               
##          Neg Pred Value : 0.884               
##              Prevalence : 0.511               
##          Detection Rate : 0.458               
##    Detection Prevalence : 0.543               
##       Balanced Accuracy : 0.861               
##                                               
##        'Positive' Class : non-owner           
## 
              # 5. What is the probability that a household with a $60K income and a lot size of
              #20,000 sqft is an owner?  what are the (log) odds?
              newcustomers = data.frame(Income = 60, Lot_Size = 20)
                        
                        riding_pred = predict(riding_log, newdata = newcustomers, type = "response")
                        riding_pred
##     1 
## 0.491
                        riding_pred = predict(riding_log, newdata = newcustomers, type = "link")
                              riding_pred              
##       1 
## -0.0356
                              #to convert   log odds to probability
                              probs <- 1 / (1 + exp(-riding_pred))
                              probs
##     1 
## 0.491
      # 6. What is the classification of a household with a $60K income and a 
      # lot size of 21,000 sqft?  Use a cutoff of 0.5. And then answer the question
      # using a cutoff or 0.75.      
      riding_pred = predict(riding_log, newdata = riding, type = "response")
              newcustomers = data.frame(Income = 60, Lot_Size = 21)
              
              riding_pred = predict(riding_log, newdata = newcustomers, type = "response")
              riding_pred
##    1 
## 0.71
              riding_pred = ifelse(riding_pred> 0.5, "owner", "non-owner")
              riding_pred
##       1 
## "owner"
              riding_pred = ifelse(riding_pred> 0.75, "owner", "non-owner")
              riding_pred
##       1 
## "owner"
              # 7. What is the minimum income that a household with a 16000 ft sq lot size 
              # should have before it is classified as an owner?  
              riding_pred = predict(riding_log, newdata = riding, type = "response")
              riding$riding_pred      = riding_pred
              
              riding %>% filter(Ownership == "owner") %>% arrange(Income, Lot_Size)
##         X Income Lot_Size Ownership riding_pred
## 1   11.00   51.0     22.0     owner       0.718
## 2   11.10   51.0     22.0     owner       0.718
## 3   11.20   51.0     22.0     owner       0.718
## 4   11.30   51.0     22.0     owner       0.718
## 5   11.40   51.0     22.0     owner       0.718
## 6   11.50   51.0     22.0     owner       0.718
## 7   11.60   51.0     22.0     owner       0.718
## 8   11.70   51.0     22.0     owner       0.718
## 9   11.80   51.0     22.0     owner       0.718
## 10  11.90   51.0     22.0     owner       0.718
## 11  11.10   51.0     22.0     owner       0.718
## 12  11.11   51.0     22.0     owner       0.718
## 13  11.12   51.0     22.0     owner       0.718
## 14  11.13   51.0     22.0     owner       0.718
## 15  11.14   51.0     22.0     owner       0.718
## 16  11.15   51.0     22.0     owner       0.718
## 17  11.16   51.0     22.0     owner       0.718
## 18  11.17   51.0     22.0     owner       0.718
## 19  11.18   51.0     22.0     owner       0.718
## 20  11.19   51.0     22.0     owner       0.718
## 21  11.20   51.0     22.0     owner       0.718
## 22  11.21   51.0     22.0     owner       0.718
## 23  11.22   51.0     22.0     owner       0.718
## 24  11.23   51.0     22.0     owner       0.718
## 25  11.24   51.0     22.0     owner       0.718
## 26  11.25   51.0     22.0     owner       0.718
## 27  11.26   51.0     22.0     owner       0.718
## 28  11.27   51.0     22.0     owner       0.718
## 29  11.28   51.0     22.0     owner       0.718
## 30  11.29   51.0     22.0     owner       0.718
## 31  11.30   51.0     22.0     owner       0.718
## 32  11.31   51.0     22.0     owner       0.718
## 33  11.32   51.0     22.0     owner       0.718
## 34  11.33   51.0     22.0     owner       0.718
## 35  11.34   51.0     22.0     owner       0.718
## 36  11.35   51.0     22.0     owner       0.718
## 37  11.36   51.0     22.0     owner       0.718
## 38  11.37   51.0     22.0     owner       0.718
## 39   1.00   60.0     18.4     owner       0.178
## 40   1.10   60.0     18.4     owner       0.178
## 41   1.20   60.0     18.4     owner       0.178
## 42   1.30   60.0     18.4     owner       0.178
## 43   1.40   60.0     18.4     owner       0.178
## 44   1.50   60.0     18.4     owner       0.178
## 45   1.60   60.0     18.4     owner       0.178
## 46   1.70   60.0     18.4     owner       0.178
## 47   1.80   60.0     18.4     owner       0.178
## 48   1.90   60.0     18.4     owner       0.178
## 49   1.10   60.0     18.4     owner       0.178
## 50   1.11   60.0     18.4     owner       0.178
## 51   1.12   60.0     18.4     owner       0.178
## 52   1.13   60.0     18.4     owner       0.178
## 53   1.14   60.0     18.4     owner       0.178
## 54   1.15   60.0     18.4     owner       0.178
## 55   1.16   60.0     18.4     owner       0.178
## 56   1.17   60.0     18.4     owner       0.178
## 57   1.18   60.0     18.4     owner       0.178
## 58   1.19   60.0     18.4     owner       0.178
## 59   1.20   60.0     18.4     owner       0.178
## 60   1.21   60.0     18.4     owner       0.178
## 61   1.22   60.0     18.4     owner       0.178
## 62   1.23   60.0     18.4     owner       0.178
## 63   1.24   60.0     18.4     owner       0.178
## 64   1.25   60.0     18.4     owner       0.178
## 65   1.26   60.0     18.4     owner       0.178
## 66   1.27   60.0     18.4     owner       0.178
## 67   1.28   60.0     18.4     owner       0.178
## 68   1.29   60.0     18.4     owner       0.178
## 69   1.30   60.0     18.4     owner       0.178
## 70   1.31   60.0     18.4     owner       0.178
## 71   1.32   60.0     18.4     owner       0.178
## 72   1.33   60.0     18.4     owner       0.178
## 73   1.34   60.0     18.4     owner       0.178
## 74   1.35   60.0     18.4     owner       0.178
## 75   1.36   60.0     18.4     owner       0.178
## 76   1.37   60.0     18.4     owner       0.178
## 77   1.38   60.0     18.4     owner       0.178
## 78   1.39   60.0     18.4     owner       0.178
## 79   1.40   60.0     18.4     owner       0.178
## 80   1.41   60.0     18.4     owner       0.178
## 81   1.42   60.0     18.4     owner       0.178
## 82   1.43   60.0     18.4     owner       0.178
## 83   1.44   60.0     18.4     owner       0.178
## 84   1.45   60.0     18.4     owner       0.178
## 85   1.46   60.0     18.4     owner       0.178
## 86   1.47   60.0     18.4     owner       0.178
## 87   4.00   61.5     20.8     owner       0.703
## 88   4.10   61.5     20.8     owner       0.703
## 89   4.20   61.5     20.8     owner       0.703
## 90   4.30   61.5     20.8     owner       0.703
## 91   4.40   61.5     20.8     owner       0.703
## 92   4.50   61.5     20.8     owner       0.703
## 93   4.60   61.5     20.8     owner       0.703
## 94   4.70   61.5     20.8     owner       0.703
## 95   4.80   61.5     20.8     owner       0.703
## 96   4.90   61.5     20.8     owner       0.703
## 97   4.10   61.5     20.8     owner       0.703
## 98   4.11   61.5     20.8     owner       0.703
## 99   4.12   61.5     20.8     owner       0.703
## 100  4.13   61.5     20.8     owner       0.703
## 101  4.14   61.5     20.8     owner       0.703
## 102  4.15   61.5     20.8     owner       0.703
## 103  4.16   61.5     20.8     owner       0.703
## 104  4.17   61.5     20.8     owner       0.703
## 105  4.18   61.5     20.8     owner       0.703
## 106  4.19   61.5     20.8     owner       0.703
## 107  4.20   61.5     20.8     owner       0.703
## 108  4.21   61.5     20.8     owner       0.703
## 109  4.22   61.5     20.8     owner       0.703
## 110  4.23   61.5     20.8     owner       0.703
## 111  4.24   61.5     20.8     owner       0.703
## 112  4.25   61.5     20.8     owner       0.703
## 113  4.26   61.5     20.8     owner       0.703
## 114  4.27   61.5     20.8     owner       0.703
## 115  4.28   61.5     20.8     owner       0.703
## 116  4.29   61.5     20.8     owner       0.703
## 117  4.30   61.5     20.8     owner       0.703
## 118  4.31   61.5     20.8     owner       0.703
## 119  4.32   61.5     20.8     owner       0.703
## 120  4.33   61.5     20.8     owner       0.703
## 121  4.34   61.5     20.8     owner       0.703
## 122  4.35   61.5     20.8     owner       0.703
## 123  4.36   61.5     20.8     owner       0.703
## 124  4.37   61.5     20.8     owner       0.703
## 125  4.38   61.5     20.8     owner       0.703
## 126  4.39   61.5     20.8     owner       0.703
## 127  4.40   61.5     20.8     owner       0.703
## 128  4.41   61.5     20.8     owner       0.703
## 129  3.00   64.8     21.6     owner       0.874
## 130  3.10   64.8     21.6     owner       0.874
## 131  3.20   64.8     21.6     owner       0.874
## 132  3.30   64.8     21.6     owner       0.874
## 133  3.40   64.8     21.6     owner       0.874
## 134  3.50   64.8     21.6     owner       0.874
## 135  3.60   64.8     21.6     owner       0.874
## 136  3.70   64.8     21.6     owner       0.874
## 137  3.80   64.8     21.6     owner       0.874
## 138  3.90   64.8     21.6     owner       0.874
## 139  3.10   64.8     21.6     owner       0.874
## 140  3.11   64.8     21.6     owner       0.874
## 141  3.12   64.8     21.6     owner       0.874
## 142  3.13   64.8     21.6     owner       0.874
## 143  3.14   64.8     21.6     owner       0.874
## 144  3.15   64.8     21.6     owner       0.874
## 145  3.16   64.8     21.6     owner       0.874
## 146  3.17   64.8     21.6     owner       0.874
## 147  3.18   64.8     21.6     owner       0.874
## 148  3.19   64.8     21.6     owner       0.874
## 149  3.20   64.8     21.6     owner       0.874
## 150  3.21   64.8     21.6     owner       0.874
## 151  3.22   64.8     21.6     owner       0.874
## 152  3.23   64.8     21.6     owner       0.874
## 153  3.24   64.8     21.6     owner       0.874
## 154  3.25   64.8     21.6     owner       0.874
## 155  3.26   64.8     21.6     owner       0.874
## 156  3.27   64.8     21.6     owner       0.874
## 157  3.28   64.8     21.6     owner       0.874
## 158  3.29   64.8     21.6     owner       0.874
## 159  3.30   64.8     21.6     owner       0.874
## 160  3.31   64.8     21.6     owner       0.874
## 161  3.32   64.8     21.6     owner       0.874
## 162  3.33   64.8     21.6     owner       0.874
## 163  3.34   64.8     21.6     owner       0.874
## 164  3.35   64.8     21.6     owner       0.874
## 165  3.36   64.8     21.6     owner       0.874
## 166  3.37   64.8     21.6     owner       0.874
## 167  3.38   64.8     21.6     owner       0.874
## 168  3.39   64.8     21.6     owner       0.874
## 169  9.00   69.0     20.0     owner       0.702
## 170  9.10   69.0     20.0     owner       0.702
## 171  9.20   69.0     20.0     owner       0.702
## 172  9.30   69.0     20.0     owner       0.702
## 173  9.40   69.0     20.0     owner       0.702
## 174  9.50   69.0     20.0     owner       0.702
## 175  9.60   69.0     20.0     owner       0.702
## 176  9.70   69.0     20.0     owner       0.702
## 177  9.80   69.0     20.0     owner       0.702
## 178  9.90   69.0     20.0     owner       0.702
## 179  9.10   69.0     20.0     owner       0.702
## 180  9.11   69.0     20.0     owner       0.702
## 181  9.12   69.0     20.0     owner       0.702
## 182  9.13   69.0     20.0     owner       0.702
## 183  9.14   69.0     20.0     owner       0.702
## 184  9.15   69.0     20.0     owner       0.702
## 185  9.16   69.0     20.0     owner       0.702
## 186  9.17   69.0     20.0     owner       0.702
## 187  9.18   69.0     20.0     owner       0.702
## 188  9.19   69.0     20.0     owner       0.702
## 189  9.20   69.0     20.0     owner       0.702
## 190  9.21   69.0     20.0     owner       0.702
## 191  9.22   69.0     20.0     owner       0.702
## 192  9.23   69.0     20.0     owner       0.702
## 193  9.24   69.0     20.0     owner       0.702
## 194  9.25   69.0     20.0     owner       0.702
## 195  9.26   69.0     20.0     owner       0.702
## 196  9.27   69.0     20.0     owner       0.702
## 197  9.28   69.0     20.0     owner       0.702
## 198  9.29   69.0     20.0     owner       0.702
## 199  9.30   69.0     20.0     owner       0.702
## 200  9.31   69.0     20.0     owner       0.702
## 201  9.32   69.0     20.0     owner       0.702
## 202  9.33   69.0     20.0     owner       0.702
## 203  9.34   69.0     20.0     owner       0.702
## 204  9.35   69.0     20.0     owner       0.702
## 205  9.36   69.0     20.0     owner       0.702
## 206  9.37   69.0     20.0     owner       0.702
## 207  9.38   69.0     20.0     owner       0.702
## 208  9.39   69.0     20.0     owner       0.702
## 209  9.40   69.0     20.0     owner       0.702
## 210  9.41   69.0     20.0     owner       0.702
## 211  9.42   69.0     20.0     owner       0.702
## 212  9.43   69.0     20.0     owner       0.702
## 213  9.44   69.0     20.0     owner       0.702
## 214 12.00   81.0     20.0     owner       0.886
## 215 12.10   81.0     20.0     owner       0.886
## 216 12.20   81.0     20.0     owner       0.886
## 217 12.30   81.0     20.0     owner       0.886
## 218 12.40   81.0     20.0     owner       0.886
## 219 12.50   81.0     20.0     owner       0.886
## 220 12.60   81.0     20.0     owner       0.886
## 221 12.70   81.0     20.0     owner       0.886
## 222 12.80   81.0     20.0     owner       0.886
## 223 12.90   81.0     20.0     owner       0.886
## 224 12.10   81.0     20.0     owner       0.886
## 225 12.11   81.0     20.0     owner       0.886
## 226 12.12   81.0     20.0     owner       0.886
## 227 12.13   81.0     20.0     owner       0.886
## 228 12.14   81.0     20.0     owner       0.886
## 229 12.15   81.0     20.0     owner       0.886
## 230 12.16   81.0     20.0     owner       0.886
## 231 12.17   81.0     20.0     owner       0.886
## 232 12.18   81.0     20.0     owner       0.886
## 233 12.19   81.0     20.0     owner       0.886
## 234 12.20   81.0     20.0     owner       0.886
## 235 12.21   81.0     20.0     owner       0.886
## 236 12.22   81.0     20.0     owner       0.886
## 237 12.23   81.0     20.0     owner       0.886
## 238 12.24   81.0     20.0     owner       0.886
## 239 12.25   81.0     20.0     owner       0.886
## 240 12.26   81.0     20.0     owner       0.886
## 241 12.27   81.0     20.0     owner       0.886
## 242 12.28   81.0     20.0     owner       0.886
## 243 12.29   81.0     20.0     owner       0.886
## 244 12.30   81.0     20.0     owner       0.886
## 245 12.31   81.0     20.0     owner       0.886
## 246 12.32   81.0     20.0     owner       0.886
## 247 12.33   81.0     20.0     owner       0.886
## 248 12.34   81.0     20.0     owner       0.886
## 249 12.35   81.0     20.0     owner       0.886
## 250 12.36   81.0     20.0     owner       0.886
## 251 12.37   81.0     20.0     owner       0.886
## 252 12.38   81.0     20.0     owner       0.886
## 253 12.39   81.0     20.0     owner       0.886
## 254 12.40   81.0     20.0     owner       0.886
## 255 12.41   81.0     20.0     owner       0.886
## 256 12.42   81.0     20.0     owner       0.886
## 257 12.43   81.0     20.0     owner       0.886
## 258  8.00   82.8     22.4     owner       0.989
## 259  8.10   82.8     22.4     owner       0.989
## 260  8.20   82.8     22.4     owner       0.989
## 261  8.30   82.8     22.4     owner       0.989
## 262  8.40   82.8     22.4     owner       0.989
## 263  8.50   82.8     22.4     owner       0.989
## 264  8.60   82.8     22.4     owner       0.989
## 265  8.70   82.8     22.4     owner       0.989
## 266  8.80   82.8     22.4     owner       0.989
## 267  8.90   82.8     22.4     owner       0.989
## 268  8.10   82.8     22.4     owner       0.989
## 269  8.11   82.8     22.4     owner       0.989
## 270  8.12   82.8     22.4     owner       0.989
## 271  8.13   82.8     22.4     owner       0.989
## 272  8.14   82.8     22.4     owner       0.989
## 273  8.15   82.8     22.4     owner       0.989
## 274  8.16   82.8     22.4     owner       0.989
## 275  8.17   82.8     22.4     owner       0.989
## 276  8.18   82.8     22.4     owner       0.989
## 277  8.19   82.8     22.4     owner       0.989
## 278  8.20   82.8     22.4     owner       0.989
## 279  8.21   82.8     22.4     owner       0.989
## 280  8.22   82.8     22.4     owner       0.989
## 281  8.23   82.8     22.4     owner       0.989
## 282  8.24   82.8     22.4     owner       0.989
## 283  8.25   82.8     22.4     owner       0.989
## 284  8.26   82.8     22.4     owner       0.989
## 285  8.27   82.8     22.4     owner       0.989
## 286  8.28   82.8     22.4     owner       0.989
## 287  8.29   82.8     22.4     owner       0.989
## 288  8.30   82.8     22.4     owner       0.989
## 289  8.31   82.8     22.4     owner       0.989
## 290  8.32   82.8     22.4     owner       0.989
## 291  8.33   82.8     22.4     owner       0.989
## 292  8.34   82.8     22.4     owner       0.989
## 293  8.35   82.8     22.4     owner       0.989
## 294  8.36   82.8     22.4     owner       0.989
## 295  8.37   82.8     22.4     owner       0.989
## 296  8.38   82.8     22.4     owner       0.989
## 297  8.39   82.8     22.4     owner       0.989
## 298  8.40   82.8     22.4     owner       0.989
## 299  2.00   85.5     16.8     owner       0.381
## 300  2.10   85.5     16.8     owner       0.381
## 301  2.20   85.5     16.8     owner       0.381
## 302  2.30   85.5     16.8     owner       0.381
## 303  2.40   85.5     16.8     owner       0.381
## 304  2.50   85.5     16.8     owner       0.381
## 305  2.60   85.5     16.8     owner       0.381
## 306  2.70   85.5     16.8     owner       0.381
## 307  2.80   85.5     16.8     owner       0.381
## 308  2.90   85.5     16.8     owner       0.381
## 309  2.10   85.5     16.8     owner       0.381
## 310  2.11   85.5     16.8     owner       0.381
## 311  2.12   85.5     16.8     owner       0.381
## 312  2.13   85.5     16.8     owner       0.381
## 313  2.14   85.5     16.8     owner       0.381
## 314  2.15   85.5     16.8     owner       0.381
## 315  2.16   85.5     16.8     owner       0.381
## 316  2.17   85.5     16.8     owner       0.381
## 317  2.18   85.5     16.8     owner       0.381
## 318  2.19   85.5     16.8     owner       0.381
## 319  2.20   85.5     16.8     owner       0.381
## 320  2.21   85.5     16.8     owner       0.381
## 321  2.22   85.5     16.8     owner       0.381
## 322  2.23   85.5     16.8     owner       0.381
## 323  2.24   85.5     16.8     owner       0.381
## 324  2.25   85.5     16.8     owner       0.381
## 325  2.26   85.5     16.8     owner       0.381
## 326  2.27   85.5     16.8     owner       0.381
## 327  2.28   85.5     16.8     owner       0.381
## 328  2.29   85.5     16.8     owner       0.381
## 329  2.30   85.5     16.8     owner       0.381
## 330  2.31   85.5     16.8     owner       0.381
## 331  2.32   85.5     16.8     owner       0.381
## 332  2.33   85.5     16.8     owner       0.381
## 333  2.34   85.5     16.8     owner       0.381
## 334  2.35   85.5     16.8     owner       0.381
## 335  2.36   85.5     16.8     owner       0.381
## 336  5.00   87.0     23.6     owner       0.998
## 337  5.10   87.0     23.6     owner       0.998
## 338  5.20   87.0     23.6     owner       0.998
## 339  5.30   87.0     23.6     owner       0.998
## 340  5.40   87.0     23.6     owner       0.998
## 341  5.50   87.0     23.6     owner       0.998
## 342  5.60   87.0     23.6     owner       0.998
## 343  5.70   87.0     23.6     owner       0.998
## 344  5.80   87.0     23.6     owner       0.998
## 345  5.90   87.0     23.6     owner       0.998
## 346  5.10   87.0     23.6     owner       0.998
## 347  5.11   87.0     23.6     owner       0.998
## 348  5.12   87.0     23.6     owner       0.998
## 349  5.13   87.0     23.6     owner       0.998
## 350  5.14   87.0     23.6     owner       0.998
## 351  5.15   87.0     23.6     owner       0.998
## 352  5.16   87.0     23.6     owner       0.998
## 353  5.17   87.0     23.6     owner       0.998
## 354  5.18   87.0     23.6     owner       0.998
## 355  5.19   87.0     23.6     owner       0.998
## 356  5.20   87.0     23.6     owner       0.998
## 357  5.21   87.0     23.6     owner       0.998
## 358  5.22   87.0     23.6     owner       0.998
## 359  5.23   87.0     23.6     owner       0.998
## 360  5.24   87.0     23.6     owner       0.998
## 361 10.00   93.0     20.8     owner       0.982
## 362 10.10   93.0     20.8     owner       0.982
## 363 10.20   93.0     20.8     owner       0.982
## 364 10.30   93.0     20.8     owner       0.982
## 365 10.40   93.0     20.8     owner       0.982
## 366 10.50   93.0     20.8     owner       0.982
## 367 10.60   93.0     20.8     owner       0.982
## 368 10.70   93.0     20.8     owner       0.982
## 369 10.80   93.0     20.8     owner       0.982
## 370 10.90   93.0     20.8     owner       0.982
## 371 10.10   93.0     20.8     owner       0.982
## 372 10.11   93.0     20.8     owner       0.982
## 373 10.12   93.0     20.8     owner       0.982
## 374 10.13   93.0     20.8     owner       0.982
## 375 10.14   93.0     20.8     owner       0.982
## 376 10.15   93.0     20.8     owner       0.982
## 377 10.16   93.0     20.8     owner       0.982
## 378 10.17   93.0     20.8     owner       0.982
## 379 10.18   93.0     20.8     owner       0.982
## 380 10.19   93.0     20.8     owner       0.982
## 381 10.20   93.0     20.8     owner       0.982
## 382 10.21   93.0     20.8     owner       0.982
## 383 10.22   93.0     20.8     owner       0.982
## 384 10.23   93.0     20.8     owner       0.982
## 385 10.24   93.0     20.8     owner       0.982
## 386 10.25   93.0     20.8     owner       0.982
## 387 10.26   93.0     20.8     owner       0.982
## 388 10.27   93.0     20.8     owner       0.982
## 389 10.28   93.0     20.8     owner       0.982
## 390 10.29   93.0     20.8     owner       0.982
## 391 10.30   93.0     20.8     owner       0.982
## 392 10.31   93.0     20.8     owner       0.982
## 393 10.32   93.0     20.8     owner       0.982
## 394 10.33   93.0     20.8     owner       0.982
## 395 10.34   93.0     20.8     owner       0.982
## 396 10.35   93.0     20.8     owner       0.982
## 397 10.36   93.0     20.8     owner       0.982
## 398 10.37   93.0     20.8     owner       0.982
## 399 10.38   93.0     20.8     owner       0.982
## 400  7.00  108.0     17.6     owner       0.924
## 401  7.10  108.0     17.6     owner       0.924
## 402  7.20  108.0     17.6     owner       0.924
## 403  7.30  108.0     17.6     owner       0.924
## 404  7.40  108.0     17.6     owner       0.924
## 405  7.50  108.0     17.6     owner       0.924
## 406  7.60  108.0     17.6     owner       0.924
## 407  7.70  108.0     17.6     owner       0.924
## 408  7.80  108.0     17.6     owner       0.924
## 409  7.90  108.0     17.6     owner       0.924
## 410  7.10  108.0     17.6     owner       0.924
## 411  7.11  108.0     17.6     owner       0.924
## 412  7.12  108.0     17.6     owner       0.924
## 413  7.13  108.0     17.6     owner       0.924
## 414  7.14  108.0     17.6     owner       0.924
## 415  7.15  108.0     17.6     owner       0.924
## 416  7.16  108.0     17.6     owner       0.924
## 417  7.17  108.0     17.6     owner       0.924
## 418  7.18  108.0     17.6     owner       0.924
## 419  7.19  108.0     17.6     owner       0.924
## 420  7.20  108.0     17.6     owner       0.924
## 421  7.21  108.0     17.6     owner       0.924
## 422  7.22  108.0     17.6     owner       0.924
## 423  7.23  108.0     17.6     owner       0.924
## 424  7.24  108.0     17.6     owner       0.924
## 425  7.25  108.0     17.6     owner       0.924
## 426  7.26  108.0     17.6     owner       0.924
## 427  7.27  108.0     17.6     owner       0.924
## 428  7.28  108.0     17.6     owner       0.924
## 429  7.29  108.0     17.6     owner       0.924
## 430  7.30  108.0     17.6     owner       0.924
## 431  7.31  108.0     17.6     owner       0.924
## 432  7.32  108.0     17.6     owner       0.924
## 433  7.33  108.0     17.6     owner       0.924
## 434  7.34  108.0     17.6     owner       0.924
## 435  7.35  108.0     17.6     owner       0.924
## 436  7.36  108.0     17.6     owner       0.924
## 437  7.37  108.0     17.6     owner       0.924
## 438  7.38  108.0     17.6     owner       0.924
## 439  7.39  108.0     17.6     owner       0.924
## 440  7.40  108.0     17.6     owner       0.924
## 441  7.41  108.0     17.6     owner       0.924
## 442  7.42  108.0     17.6     owner       0.924
## 443  7.43  108.0     17.6     owner       0.924
## 444  7.44  108.0     17.6     owner       0.924
## 445  7.45  108.0     17.6     owner       0.924
## 446  7.46  108.0     17.6     owner       0.924
## 447  7.47  108.0     17.6     owner       0.924
## 448  7.48  108.0     17.6     owner       0.924
## 449  7.49  108.0     17.6     owner       0.924
## 450  7.50  108.0     17.6     owner       0.924
## 451  7.51  108.0     17.6     owner       0.924
## 452  6.00  110.1     19.2     owner       0.985
## 453  6.10  110.1     19.2     owner       0.985
## 454  6.20  110.1     19.2     owner       0.985
## 455  6.30  110.1     19.2     owner       0.985
## 456  6.40  110.1     19.2     owner       0.985
## 457  6.50  110.1     19.2     owner       0.985
## 458  6.60  110.1     19.2     owner       0.985
## 459  6.70  110.1     19.2     owner       0.985
## 460  6.80  110.1     19.2     owner       0.985
## 461  6.90  110.1     19.2     owner       0.985
## 462  6.10  110.1     19.2     owner       0.985
## 463  6.11  110.1     19.2     owner       0.985
## 464  6.12  110.1     19.2     owner       0.985
## 465  6.13  110.1     19.2     owner       0.985
## 466  6.14  110.1     19.2     owner       0.985
## 467  6.15  110.1     19.2     owner       0.985
## 468  6.16  110.1     19.2     owner       0.985
## 469  6.17  110.1     19.2     owner       0.985
## 470  6.18  110.1     19.2     owner       0.985
## 471  6.19  110.1     19.2     owner       0.985
## 472  6.20  110.1     19.2     owner       0.985
## 473  6.21  110.1     19.2     owner       0.985
## 474  6.22  110.1     19.2     owner       0.985
## 475  6.23  110.1     19.2     owner       0.985
## 476  6.24  110.1     19.2     owner       0.985
## 477  6.25  110.1     19.2     owner       0.985
## 478  6.26  110.1     19.2     owner       0.985
## 479  6.27  110.1     19.2     owner       0.985
## 480  6.28  110.1     19.2     owner       0.985
## 481  6.29  110.1     19.2     owner       0.985
## 482  6.30  110.1     19.2     owner       0.985
## 483  6.31  110.1     19.2     owner       0.985
## 484  6.32  110.1     19.2     owner       0.985
## 485  6.33  110.1     19.2     owner       0.985
## 486  6.34  110.1     19.2     owner       0.985
## 487  6.35  110.1     19.2     owner       0.985
## 488  6.36  110.1     19.2     owner       0.985
## 489  6.37  110.1     19.2     owner       0.985
              min_income = riding %>% filter(Ownership == "owner"& Lot_Size >=16) %>% 
                arrange(Income, Lot_Size)
                  min(min_income[2])
## [1] 51
              #Also: show a graph
              # displaying the predicted probabilities of Ownership and Income
              plot_model(riding_log, type = "pred", terms = c("Income", "Lot_Size[16]")) 

    #==========================================================================#
    # Part II
    # Identifying Good System Administrators
    
    # A management consultant is studying the roles played by experience and
    # training ina system administrator's ability to complete a set of tasks
    # in a specified amount of time.  
    
    # In particular, she is interested in discriminating betwen administrators
    # who are able to complete given tasks within a specified amount of time
    # and those who are not.  Data on the performance of 75 randomly selected
    # administrators are stored in the file SystemAdministrators.csv.  
              
    # Experience: months of full-time system administrator experience;
    # Training: number of relevant training credits;
    # Completed: Yes or No depending on whether the administrator completed the
    #   tasks.
    
    ##################################################################
              remove(list = ls())          
              dir()            
## [1] "LogisticRegressionProject.R"            
## [2] "LogisticRegressionProject_AROD.knit.md" 
## [3] "LogisticRegressionProject_AROD.R"       
## [4] "LogisticRegressionProject_AROD.spin.R"  
## [5] "LogisticRegressionProject_AROD.spin.Rmd"
## [6] "RidingMowers.csv"                       
## [7] "RidingMowers_REV.csv"                   
## [8] "RidingMowersLargeSet.csv"               
## [9] "SystemAdministrators.xls"
              library(readxl)
              library(caret)
              library(tidyverse)
              
              # Use the file SystemAdministrators.xls and read it using 
              # the function read_excel()
              sa = read_excel("SystemAdministrators.xls", sheet = "data")
              head(sa)
## # A tibble: 6 x 3
##   Experience Training Completed
##        <dbl>    <dbl> <chr>    
## 1       10.9        4 Yes      
## 2        9.9        4 Yes      
## 3       10.4        6 Yes      
## 4       13.7        6 Yes      
## 5        9.4        8 Yes      
## 6       12.4        4 Yes
              str(sa)
## Classes 'tbl_df', 'tbl' and 'data.frame':    75 obs. of  3 variables:
##  $ Experience: num  10.9 9.9 10.4 13.7 9.4 12.4 7.9 8.9 10.2 11.4 ...
##  $ Training  : num  4 4 6 6 8 4 6 4 6 4 ...
##  $ Completed : chr  "Yes" "Yes" "Yes" "Yes" ...
              #Exploratory Data Analysis
              #tally the outcome variable
              sa %>% tally(~Completed, .)
## Completed
##  No Yes 
##  60  15
              #Show a boxplot of Training and the Otcome variable 
              ggplot(sa, aes( x= Completed, y = Training)) +
                geom_boxplot()

              #Show a boxplot of Experience and the Outcome variable 
              ggplot(sa, aes( x= Completed, y = Experience)) +
                geom_boxplot()

              # Create a scatter plot of Experience vs. Training using colors
              # or symbol to distinguish administrators who completed the task
              # and those who didn't.   Which predictor(s) appear(s) potentially
              # useful for classfying task completion?
              
              ggplot(sa, aes(y = Experience, x = Training)) +
                geom_point(aes(col = Completed))

              # Run a logistic regression model with both predictors using the 
              # entire dataset as training data.  NOte: recall that the outcome
              # variable should be a factor variable
              sa$Completed = as.factor(sa$Completed)
              sa_logit = glm(Completed ~., data= sa, family = "binomial")
              summary(sa_logit)              
## 
## Call:
## glm(formula = Completed ~ ., family = "binomial", data = sa)
## 
## Deviance Residuals: 
##    Min      1Q  Median      3Q     Max  
## -2.653  -0.350  -0.175  -0.082   2.218  
## 
## Coefficients:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -10.981      2.892   -3.80  0.00015 ***
## Experience     1.127      0.291    3.87  0.00011 ***
## Training       0.181      0.339    0.53  0.59397    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 75.060  on 74  degrees of freedom
## Residual deviance: 35.713  on 72  degrees of freedom
## AIC: 41.71
## 
## Number of Fisher Scoring iterations: 6
              # Among those who completed
              # the task, what is the percentage of programmers incorrectly
              # classified as failing to complete the task?              
              sa_pred = predict( sa_logit, newdata = sa, type = "response")
              sa_pred
##        1        2        3        4        5        6        7        8 
## 0.883323 0.710404 0.860786 0.996092 0.741910 0.976216 0.269828 0.442855 
##        9       10       11       12       13       14       15       16 
## 0.831521 0.930069 0.361776 0.527097 0.974614 0.155173 0.085430 0.008686 
##       17       18       19       20       21       22       23       24 
## 0.130444 0.009712 0.007768 0.004963 0.003965 0.040715 0.006945 0.012138 
##       25       26       27       28       29       30       31       32 
## 0.442855 0.024124 0.015160 0.021129 0.170527 0.000734 0.062454 0.051599 
##       33       34       35       36       37       38       39       40 
## 0.078674 0.002021 0.009712 0.015160 0.158204 0.155173 0.448522 0.970381 
##       41       42       43       44       45       46       47       48 
## 0.023590 0.032768 0.024124 0.003544 0.011362 0.187063 0.085430 0.004436 
##       49       50       51       52       53       54       55       56 
## 0.032768 0.008686 0.077027 0.012138 0.069387 0.023590 0.006210 0.104765 
##       57       58       59       60       61       62       63       64 
## 0.012138 0.010858 0.527097 0.006945 0.037350 0.223772 0.003168 0.040715 
##       65       66       67       68       69       70       71       72 
## 0.056172 0.050488 0.231842 0.050488 0.265332 0.026330 0.018920 0.052733 
##       73       74       75 
## 0.063811 0.002530 0.013566
              sa_pred = ifelse(sa_pred >=0.5, "Yes", "No")
              sa_pred = as.factor(sa_pred)
              confusionMatrix(sa_pred, sa$Completed)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No  58   5
##        Yes  2  10
##                                         
##                Accuracy : 0.907         
##                  95% CI : (0.817, 0.962)
##     No Information Rate : 0.8           
##     P-Value [Acc > NIR] : 0.0104        
##                                         
##                   Kappa : 0.685         
##  Mcnemar's Test P-Value : 0.4497        
##                                         
##             Sensitivity : 0.967         
##             Specificity : 0.667         
##          Pos Pred Value : 0.921         
##          Neg Pred Value : 0.833         
##              Prevalence : 0.800         
##          Detection Rate : 0.773         
##    Detection Prevalence : 0.840         
##       Balanced Accuracy : 0.817         
##                                         
##        'Positive' Class : No            
## 
              # To decrease the percentage in the answer above, should 
              # the cutoff probability be increase or decreased?
              sa_pred = predict( sa_logit, newdata = sa, type = "response")
              sa_pred = ifelse(sa_pred >=0.35, "Yes", "No")
              sa_pred = as.factor(sa_pred)
              confusionMatrix(sa_pred, sa$Completed)
## Confusion Matrix and Statistics
## 
##           Reference
## Prediction No Yes
##        No  56   3
##        Yes  4  12
##                                         
##                Accuracy : 0.907         
##                  95% CI : (0.817, 0.962)
##     No Information Rate : 0.8           
##     P-Value [Acc > NIR] : 0.0104        
##                                         
##                   Kappa : 0.715         
##  Mcnemar's Test P-Value : 1.0000        
##                                         
##             Sensitivity : 0.933         
##             Specificity : 0.800         
##          Pos Pred Value : 0.949         
##          Neg Pred Value : 0.750         
##              Prevalence : 0.800         
##          Detection Rate : 0.747         
##    Detection Prevalence : 0.787         
##       Balanced Accuracy : 0.867         
##                                         
##        'Positive' Class : No            
## 
              # How much experience must be accumulated by a programmer with 4
              # years of training before his or her estimated probability
              # of completing the task exceeds 0.5?  
              sa_pred = predict( sa_logit, newdata = sa, type = "response")
              sa$sa_pred = sa_pred
              sa %>% filter(sa_pred >= 0.5 &  Training == 4) %>% arrange(Experience)
## # A tibble: 7 x 4
##   Experience Training Completed sa_pred
##        <dbl>    <dbl> <fct>       <dbl>
## 1        9.2        4 Yes         0.527
## 2        9.2        4 No          0.527
## 3        9.9        4 Yes         0.710
## 4       10.9        4 Yes         0.883
## 5       11.4        4 Yes         0.930
## 6       12.2        4 No          0.970
## 7       12.4        4 Yes         0.976
              #A relevant 
              #predicted prob of completed task graph could supplement the answer.
              plot_model(sa_logit, type = "pred", terms= c("Experience", "Training[4]"))

              #=================================================================#