FreeGeoData: January 2015

Tuesday, January 13, 2015

Attached is a sample of creating customer journeys within R code this was using a SQL Server database as a backend.

#==========================EM Clustering on the text===============================#

#word frequency

wordFreq <- sort(rowSums(m), decreasing = T)

#String count fun

strcount <- function(x, pattern, split){

  unlist(lapply(

    strsplit(x, split),

    function(z) na.omit(length(grep(pattern, z)))

))

}

#Find counts of each words for each record

string <- names(wordFreq[wordFreq > 40])

#Null data frame for storing

count.string <- matrix(0, ncol = length(string), nrow = length(MN1[, 1]))

count.string <- data.frame(count.string)

for (i in 1:length(string)){

   count.string[, i] <- strcount(tolower(MN1[1:length(MN1[, 1]), 1]), string[i], " ")

}

colnames(count.string) <- string

#Conbind the data frame with orginal data records

MN2 <- cbind(MN1[ ,1], count.string)

#Clustering

mc <- Mclust(MN2[, 2:length(MN2[1, ])], 3)

plot(mc, what = c('classification'),

     dimens = c(3, 4))

MN2.output <- MN2[mc$classification == 2, ]

FreeGeoData