Data visualization with R: Histogram, Boxplot, Piechart, Mosiacplot, Correlation

Download the dataset by clicking on the below link

Targeted Marketing Campaign data

Alternatively this dataset is also available at 

We rename the data as dataset in R

# Assigning it to a new dataframe named "dataset"
dataset <- predicting_response_to_telephone_calls

Data visualization

Histogram

# The simplest way to visualize the data is to plot the histogram

hist(dataset$age)

# Better visualization - add labels to

hist(dataset$age, main="Histogram for age",
xlab="age")

# where, main is the title of the histogram


## Let us color this histogram

hist(dataset$age, main="Histogram for age",
xlab="age",
border="blue",
col="green",
xlim=c(0,100),
las=1,
breaks=10)
# Where, xlab is the name of the Axis,
# Border = color of the border of the graph
# Col = color of the graph
# Xlim = is the minimum and maximum value you want the graph to take

# las – A numeric value indicating the orientation of the tick mark labels
#and any other text added to a plot after its initialization.
#The options are as follows: always parallel to the axis (the default, 0),
# always horizontal (1), always perpendicular to the axis (2), and always
# Breaks - a single number giving the number of cells for the histogram

# Multiple colors
hist(dataset$age,
main="Histogram for age",
xlab="age",
border="blue",
col=c("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan"),
xlim=c(0,100),
las=1,
breaks=5)

Plotting Histogram with mean and standard deviation value

# Histogram with n = sample size, m= mean, sd = standard deviation
score <- rnorm (n=1309, m=29.8831, sd= 14.41)
hist(score)
## Let us color this histogram
hist(score, main="Histogram for age",
xlab="age",
border="blue",
col="green",
xlim=c(0,100),
las=1,
breaks=10)

Boxplot

# Box Plot shows 5 statistically significant numbers- the minimum, the 25th percentile, the median,
# the 75th percentile and the maximum. It is thus useful for visualizing the spread of the data is
# and deriving inferences accordingly.

boxplot(dataset$age)
boxplot(dataset$age, col = "Green")

# Bivariate box plot - left of (~) symbol = y axis, right of (~) symbol = x axis

boxplot(dataset$age~dataset$y) #Creating Box Plot between two variable

boxplot(dataset$age~dataset$y, col = c("Blue", "Green"))

boxplot(dataset$duration,col="red")
boxplot(dataset$duration~dataset$y,col="red")

Piechart

pie(table(train$Gender))

pie(table(dataset$housing))

Mosiac Plot

#A mosaic plot can be used for plotting categorical data very effectively with the area of the data
#showing the relative proportions.

mosaicplot(dataset$ed)

Visualizing correlation

## Visualizing correlation

install.packages("corrplot")
library("corrplot")
dataset_subset <-data.frame(dataset$age, dataset$balance, dataset$campaign)

M <- cor(dataset_subset)

corrplot(M, method = "circle")
corrplot(M, method = "square")
corrplot(M, method = "ellipse")
corrplot(M, method = "number") # Display the correlation coefficient
corrplot(M, method = "shade")
corrplot(M, method = "color")
corrplot(M, method = "pie")

## Layout
corrplot(M, type = "upper")
corrplot(M, type = "lower")
corrplot(M, method = "number", type = "lower")

## order

corrplot(M, order = "AOE")
corrplot(M, order = "hclust")
corrplot(M, order = "FPC")
corrplot(M, order = "alphabet")

##
res1 <- cor.mtest(mtcars, conf.level = .95)
res2 <- cor.mtest(mtcars, conf.level = .99)

## specialized the insignificant value according to the significant level
corrplot(M, p.mat = res1$p, sig.level = .2)
corrplot(M, p.mat = res1$p, sig.level = .05)
corrplot(M, p.mat = res1$p, sig.level = .01)

## leave blank on no significant coefficient
corrplot(M, p.mat = res1$p, insig = "blank")

1 Comment

  1. Took me time to read all the comments, but I really enjoyed the article. It proved to be Very helpful to me and I am sure to all the commenters here! It’s always nice when you can not only be informed, but also entertained!

Leave a Reply

Your email address will not be published. Required fields are marked *