The code for supporting market segment business case calculations in Chapter 5, Data Mining with Cluster Analysis, is as follows:
# create dataframe with columns for distances compare <- cbind(clus, dist2 = rep(0, dim(clus)[1]), dist3 = rep(0, dim(clus)[1])) # ------- # functions to find distance based on Spherical Law of Cosines distance2 <- function(lat, long, clus_id) { acos(sin(lat * pi / 180) * sin(two$centers[clus_id, 1] * pi / 180) + cos(lat * pi / 180) * cos(two$centers[clus_id, 1] * pi / 180) * cos(two$centers[clus_id, 2] * pi / 180 - long * pi / 180)) * 6371 #in km } distance3 <- function(lat, long, clus_id) { acos(sin(lat * pi / 180)*sin(three$centers[clus_id, 1] * pi / 180) + cos(lat * pi / 180) * cos(three$centers[clus_id, 1] * pi / 180) * cos(three$centers[clus_id, 2] * pi / 180 - long * pi / 180)) * 6371 #in km } # ------- for (e in 1:dim(compare[1])[1]) { compare[e, 5] <- distance2(compare[e, 1], compare[e, 2], compare[e,3]) compare[e, 6] <- distance3(compare[e, 1], compare[e, 2], compare[e, 4]) } if(!require("dplyr")) install.packages("dplyr") suppressMessages(suppressWarnings(library(dplyr))) compare <- cbind(compare, hybrid = rep(0, dim(compare)[1])) for (e in 1:dim(compare[1])[1]) { compare[e, 7] <- distance3(compare[e, 1], compare[e, 2], compare[e, 3]) } compare <- mutate(compare, temp_increase = (hybrid - dist3)) # ------- par(mfrow = c(1, 3)) hist(compare[ ,5], ylim = c(0, 80), xlim = c(0, 8), col = "lightgray", xlab = "Dist (km)", main = "Two Kiosks") abline(v = mean(compare[ ,5]), lty = "dashed") hist(compare[ ,6], ylim = c(0, 80), xlim = c(0, 8), col = "lightgray", xlab = "Dist (km)", main = "Three Kiosks") abline(v = mean(compare[ ,6]), lty = "dashed") hist(compare[ ,7], ylim = c(0, 80), xlim = c(0, 8), col = "lightgray", xlab = "Dist (km)", main = "Two (Hybrid Solution)") abline(v = mean(compare[ ,7]), lty = "dashed") par(mfrow = c(1, 1)) summary(compare) #to indicate max hist(compare$temp_increase, breaks = 4, xlab = "Distance (km)", main = "Distance Increase: Building Two Kiosks at Future Locations", col = "gray") bins <- as.data.frame(table(cut(compare$temp_increase, breaks = c(-1:5)))) text(seq(-.5, 4.5, 1), 50, cex = 1.1, col = 'black', bins[ ,2]) increase <- filter(compare, temp_increase > 0) increase <- increase[ ,-c(1:7)] summary(increase) text(2, 80, paste("Average increase:", round(mean(increase) * 0.62137119, 2),"miles")) # .62 mi/km
Note
You can also find this code integrated with all the code for Chapter 5, Data Mining with Cluster Analysis, on the book's website at http://jgendron.github.io/com.packtpub.intro.r.bi/ .