## Data visualization with R: Histogram, Boxplot, Piechart, Mosiacplot, Correlation

**Download the dataset by clicking on the below link**

**Targeted Marketing Campaign data**

**Alternatively this dataset is also available at **

**We rename the data as dataset in R**

# Assigning it to a new dataframe named "dataset"

dataset <- predicting_response_to_telephone_calls

**Data visualization**

**Histogram**

`# The simplest way to visualize the data is to plot the histogram`

`hist(dataset$age)`

`# Better visualization - add labels to`

`hist(dataset$age, main="Histogram for age",`

`xlab="age")`

# where, main is the title of the histogram

## Let us color this histogram`hist(dataset$age, main="Histogram for age",`

`xlab="age",`

`border="blue",`

`col="green",`

`xlim=c(0,100),`

`las=1,`

`breaks=10)`

# Where, xlab is the name of the Axis,

# Border = color of the border of the graph

# Col = color of the graph

# Xlim = is the minimum and maximum value you want the graph to take

# las – A numeric value indicating the orientation of the tick mark labels

#and any other text added to a plot after its initialization.

#The options are as follows: always parallel to the axis (the default, 0),

# always horizontal (1), always perpendicular to the axis (2), and always

# Breaks - a single number giving the number of cells for the histogram`# Multiple colors`

`hist(dataset$age,`

`main="Histogram for age",`

`xlab="age",`

`border="blue",`

`col=c("red", "yellow", "green", "violet", "orange", "blue", "pink", "cyan"),`

`xlim=c(0,100),`

`las=1,`

`breaks=5)`

**Plotting Histogram with mean and standard deviation value**

`# Histogram with n = sample size, m= mean, sd = standard deviation`

`score <- rnorm (n=1309, m=29.8831, sd= 14.41)`

`hist(score)`

`## Let us color this histogram`

`hist(score, main="Histogram for age",`

`xlab="age",`

`border="blue",`

`col="green",`

`xlim=c(0,100),`

`las=1,`

`breaks=10)`

**Boxplot**

`# Box Plot shows 5 statistically significant numbers- the minimum, the 25th percentile, the median,`

`# the 75th percentile and the maximum. It is thus useful for visualizing the spread of the data is`

`# and deriving inferences accordingly.`

`boxplot(dataset$age)`

`boxplot(dataset$age, col = "Green")`

`# Bivariate box plot - left of (~) symbol = y axis, right of (~) symbol = x axis`

`boxplot(dataset$age~dataset$y) #Creating Box Plot between two variable`

`boxplot(dataset$age~dataset$y, col = c("Blue", "Green"))`

`boxplot(dataset$duration,col="red")`

`boxplot(dataset$duration~dataset$y,col="red")`

**Piechart**

`pie(table(train$Gender))`

`pie(table(dataset$housing))`

**Mosiac Plot**

`#A mosaic plot can be used for plotting categorical data very effectively with the area of the data`

`#showing the relative proportions.`

`mosaicplot(dataset$ed)`

**Visualizing correlation**

`## Visualizing correlation`

`install.packages("corrplot")`

`library("corrplot")`

`dataset_subset <-data.frame(dataset$age, dataset$balance, dataset$campaign)`

`M <- cor(dataset_subset)`

`corrplot(M, method = "circle")`

`corrplot(M, method = "square")`

`corrplot(M, method = "ellipse")`

`corrplot(M, method = "number") # Display the correlation coefficient`

`corrplot(M, method = "shade")`

`corrplot(M, method = "color")`

`corrplot(M, method = "pie")`

`## Layout`

`corrplot(M, type = "upper")`

`corrplot(M, type = "lower")`

`corrplot(M, method = "number", type = "lower")`

`## order`

`corrplot(M, order = "AOE")`

`corrplot(M, order = "hclust")`

`corrplot(M, order = "FPC")`

`corrplot(M, order = "alphabet")`

`##`

`res1 <- cor.mtest(mtcars, conf.level = .95)`

`res2 <- cor.mtest(mtcars, conf.level = .99)`

`## specialized the insignificant value according to the significant level`

`corrplot(M, p.mat = res1$p, sig.level = .2)`

`corrplot(M, p.mat = res1$p, sig.level = .05)`

`corrplot(M, p.mat = res1$p, sig.level = .01)`

`## leave blank on no significant coefficient`

`corrplot(M, p.mat = res1$p, insig = "blank")`