Business Intelligence in the Cloud

IBM Watson Studio - Business Intelligence in the Cloud

IBM Watson Studio provides a useful platform for running Machine Learning and developing Business Intelligence. Cloud resources can be accessed virtually for free and the lite version of same may prove a fruitful exemplar of tools now available to small businesses wanting to harness the cloud remotely. Rather than owning their own computing infrastructure or data centres, micro-entrepreneurs can rent access to a cloud service provider. This option may also save money but of course does not have the allure of being free. One advantage of using cloud computing services is that firms can avoid the upfront cost and responsibility of owning and maintaining their own IT infrastructure, and just simply pay for what they use, as and when they use it.

Resources like this are increasingly being relied upon by small business to leverage economies of scale that can be summoned to your computer, tablet or phone. The following resources and tools are available for free to explore with AI and machine learning functionality within the Watson Studio :

50 capacity unit hours/month
Integrated environments
Publish and collaborate in the cloud
Notebook servers and R Studio for interactivity and data visualization with Python, R, and Scala

Small business are typically resource-stretched actors and so would benefit by being able to make use of data tools normally exclusive to the big players. Of course, free sometimes means your data is the the commodity being sold on to third parties. Machine Learning and Artificial Intelligence are increasingly relied upon by business owners. Below, we explore a limited number of cloud resources available within IBM Watson Studio and develop more the exploratory data analysis of the HDMA dataset replete with graphing, data query and pivot tabling. R Tidyverse code is provided underneath the video clips - so implementation is kept tractable and straight-forward. The video clips do not provide an exhaustive detailed account of IBM Watson Studio resources. These are merely intended to show case a limited but nontrivial aspect of Cloud functionality that interested parties might explore further..

####################################################

# HMDA Boston tiyverse

# Exploratory Data Analysis

# Dataset Described in

# http://pubs.aeaweb.org/doi/pdfplus/10.1257/jep.28.2.3

# Hal R. Varian

###################################################

library(Ecdat)

library(tidyverse)

library(party)

data(Hdma)

# fix annoying spelling error

names(Hdma)[11] <- "condo"

# dir: debt payments to total income ratio;

# hir: housing expenses to income ratio;

# lvr: ratio of size of loan to assessed value of property;

# ccs: consumer credit score;

# mcs: mortgage credit score;

# pbcr: public bad credit record;

# dmi: denied mortgage insurance;

# self: self employed;

# single: applicant is single;

# uria: 1989 Massachusetts unemployment rate applicant's industry;

# condominiom: condominium;

# black: race of applicant black;

# deny: mortgage application denied;

# inspect the data

head(Hdma)

summary(Hdma)

str(Hdma)

view(Hdma)

# Proportions approved - no = approved

ggplot(Hdma, aes(x = deny)) +

theme_bw() +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial, no implies approved")

# Numbers with different ccs (the lower the better)

ggplot(Hdma, aes(x = ccs)) +

theme_bw() +

geom_bar() +

labs(y = "score",

title = "consumer credit score")

# Numbers with different mcs (the lower the better)

ggplot(Hdma, aes(x = mcs)) +

theme_bw() +

geom_bar() +

labs(y = "score",

title = "mortgage credit score")

# Parallelization of graphs

ggplot(Hdma, aes(x = deny)) +

theme_bw() +

facet_wrap(~ ccs) +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial for varying ccs")

################################

# all

################################

# exclude incomplete entries

all <- Hdma[complete.cases(Hdma),]

# Parallelization of graphs

ggplot(all, aes(x = deny)) +

theme_bw() +

facet_wrap(~ ccs) +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial for varying ccs")

# Deny relative to ccs and pbcr

ggplot(all, aes(x = deny, fill = pbcr)) +

theme_bw() +

facet_wrap(~ ccs) +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial for varying ccs and pbcr")

# Deny relative to ccs and dmi

ggplot(all, aes(x = deny, fill = dmi)) +

theme_bw() +

facet_wrap(~ ccs) +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial for varying ccs and dmi")

# breakdown of employed and self employed

ggplot(all, aes(x = self)) +

theme_bw() +

geom_bar() +

labs(y = "Self Employed",

title = "Self Employed")

# examining mortgage approval in relation to employed and self employed status

ggplot(all, aes(x = deny, fill = self)) +

theme_bw() +

facet_wrap(~ ccs) +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial for varying ccs and self-employed")

# Deny relative to mcs and dmi

ggplot(all, aes(x = deny, fill = dmi)) +

theme_bw() +

facet_wrap(~ mcs) +

geom_bar() +

labs(y = "Mortgage Deny Count",

title = "Mortgage denial for varying mcs and dmi")

# setting out a histogram for lvr

ggplot(all, aes(x = lvr)) +

theme_bw() +

geom_histogram(binwidth = 0.1) +

labs(y = "number of mortgage application in lvr band",

x = "lvr (binwidth = 0.05)",

title = "lvr Distribtion")

# exploring lvr and likely effects on mortgage approval

ggplot(all, aes(x = lvr, fill = deny)) +

theme_bw() +

geom_histogram(binwidth = 0.1) +

labs(y = "number of mortgage application in lvr band",

x = "lvr (binwidth = 0.1)",

title = "lvr Distribtion")

############################################################

logit.fitlvr <- glm(deny ~ lvr,data=all,family="binomial")

summary(logit.fitlvr)

logit.fit <- glm(deny ~ .,data=all,family="binomial")

summary(logit.fit)

###########################################################

# exploring the relationship between dir and hir

ggplot(all, aes(x = dir, y = hir)) +

geom_point()

# exploring the relationship between dir and hir

ggplot(data = all) +

geom_point(mapping = aes(x = dir, y = hir, color = ccs))

ggplot(data = all) +

geom_point(mapping = aes(x = dir, y = hir, color = deny))

all %>%

filter(dir < 1, hir < 1) %>%

ggplot() +

geom_point(mapping = aes(x = dir, y = hir, color = ccs))

all %>%

filter(dir < 1, hir < 1) %>%

ggplot() +

geom_point(mapping = aes(x = dir, y = hir, color = deny))

# exploring the relationship between dir and hir

ggplot(data = all) +

geom_point(mapping = aes(x = dir, y = hir, color = ccs)) +

facet_wrap(~ ccs, nrow = 2)

# exploring the relationship between dir and hir

ggplot(data = all) +

geom_point(mapping = aes(x = dir, y = hir, color = deny)) +

facet_wrap(~ ccs, nrow = 2)

# exploring the relationship between dir and hir

ggplot(data = all) +

geom_point(mapping = aes(x = dir, y = hir, color = deny)) +

facet_wrap(~ ccs, nrow = 2) +

geom_smooth(mapping = aes(x = dir, y = hir))

# exploring the relationship between dir and hir for dir < 2

all %>%

filter(dir < 1) %>%

ggplot() +

geom_point(mapping = aes(x = dir, y = hir, color = deny)) +

facet_wrap(~ ccs, nrow = 2) #+

# geom_smooth(mapping = aes(x = dir, y = hir))

cor(all$dir,all$hir)

all %>%

filter(dir < 1) %>%

ggplot() +

geom_point(mapping = aes(x = lvr, y = dir, color = deny)) +

facet_wrap(~ ccs, nrow = 2) +

geom_smooth(mapping = aes(x = lvr, y = dir))

all %>%

filter(dir < 1, lvr < 1) %>%

ggplot() +

geom_point(mapping = aes(x = lvr, y = dir, color = deny)) +

facet_wrap(~ ccs, nrow = 2) +

geom_smooth(mapping = aes(x = lvr, y = dir))

cor(all$dir,all$lvr)

ggplot(data = all) +

geom_boxplot(mapping = aes(x = deny, y = dir))

ggplot(data = all) +

geom_boxplot(mapping = aes(x = deny, y = dir)) +

facet_wrap(~ ccs, nrow = 2)

all %>%

filter(dir < 2) %>%

ggplot() +

geom_boxplot(mapping = aes(x = deny, y = dir)) +

facet_wrap(~ ccs, nrow = 2)

all %>%

filter(lvr < 2) %>%

ggplot() +

geom_boxplot(mapping = aes(x = deny, y = lvr)) +

facet_wrap(~ ccs, nrow = 2)

all %>%

filter(lvr < 2) %>%

ggplot() +

geom_boxplot(mapping = aes(x = deny, y = lvr, color = self)) +

facet_wrap(~ ccs, nrow = 2)

#lvr boxplot for african american reltaive to rest of population

all %>%

filter(lvr < 2) %>%

ggplot() +

geom_boxplot(mapping = aes(x = deny, y = lvr, color = black)) +

facet_wrap(~ ccs, nrow = 2)

# The following pivot tables provide another tool aggregating and summarising relationships in data

pivot1 <- all %>%

group_by(deny) %>%

summarize(Medianlvr = median(lvr, na.rm=TRUE),

count = n()) %>%

arrange(deny)

View(pivot1)

pivot2 <- all %>%

group_by(deny, dmi) %>%

summarize(Medianlvr = median(lvr, na.rm=TRUE),

count = n()) %>%

arrange(deny, dmi)

View(pivot2)

all.fit <- ctree(deny ~ .,data=all)

# Figure 5 in paper

#pdf("all.pdf",height=8,width=16)

plot(all.fit)

graphics.off()

pivot3 <- all %>%

group_by( deny, ccs, mcs) %>%

summarize(meandir = mean(dir, na.rm=TRUE),

count = n()) %>%

arrange(deny, ccs, mcs)

View(pivot3)

Page updated

Google Sites

Report abuse