An R Tutorial On Visualizing Population Pyramids -Part 2-
Abbad Bouchra under the supervision of Dr.Farid FLICI
Load Necessary Libraries
First, we need to load the necessary libraries for data manipulation and visualization.
# Set the CRAN mirror
options(repos = c(CRAN = "https://cran.rstudio.com/"))
# Load necessary packages and suppress warnings
if (!requireNamespace("readxl", quietly = TRUE)) {
install.packages("readxl")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
if (!requireNamespace("reshape", quietly = TRUE)) {
install.packages("reshape")
}
if (!requireNamespace("reshape2", quietly = TRUE)) {
install.packages("reshape2")
}
if (!requireNamespace("pyramid", quietly = TRUE)) {
install.packages("pyramid")
}
if (!requireNamespace("animation", quietly = TRUE)) {
install.packages("animation")
}
if (!requireNamespace("ggthemes", quietly = TRUE)) {
install.packages("ggthemes")
}
# Load necessary packages and suppress warnings
suppressWarnings(suppressPackageStartupMessages(library(readxl)))
suppressWarnings(suppressPackageStartupMessages(library(ggplot2)))
suppressWarnings(suppressPackageStartupMessages(library(reshape)))
suppressWarnings(suppressPackageStartupMessages(library(reshape2)))
suppressWarnings(suppressPackageStartupMessages(library(pyramid)))
suppressWarnings(suppressPackageStartupMessages(library(animation)))
suppressWarnings(suppressPackageStartupMessages(library(ggthemes)))
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
Upload Population Data
Assume that you have an Excel file.
# Load the dataset
proj <- read_excel("C:/Users/ABBAD TECHNOLOGY/Downloads/projection.xlsx")
# View the structure of the dataset
str(proj)
## tibble [100 × 3] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:100] 0 1 2 3 4 5 6 7 8 9 ...
## $ males : num [1:100] 525931 505803 494789 479622 455602 ...
## $ females: num [1:100] 495945 474973 460463 437178 422942 ...
Prepare Data for Visualization
Now, we’ll prepare the data for creating population pyramids. We’ll select data for males and females separately.
# Read the data for males
males <- proj %>% select(age, males)
males
## # A tibble: 100 × 2
## age males
## <dbl> <dbl>
## 1 0 525931
## 2 1 505803
## 3 2 494789
## 4 3 479622
## 5 4 455602
## 6 5 440379
## 7 6 409459
## 8 7 375371
## 9 8 350141
## 10 9 330236
## # ℹ 90 more rows
# Read the data for females
females <- proj %>% select(age, females)
females
## # A tibble: 100 × 2
## age females
## <dbl> <dbl>
## 1 0 495945
## 2 1 474973
## 3 2 460463
## 4 3 437178
## 5 4 422942
## 6 5 400370
## 7 6 388023
## 8 7 376249
## 9 8 350256
## 10 9 330381
## # ℹ 90 more rows
Create Population Pyramid:
This section demonstrates how to create an initial population pyramid displaying both male and female populations.
# Combine male and female data for plotting
combined <- data.frame(
age = males$age,
males = males$males,
females = females$females
)
# Create the population pyramid plot
p <- ggplot(combined, aes(x = age, y = males)) +
geom_bar(stat = "identity", aes(fill = "Male"), position = "identity") +
geom_bar(stat = "identity", aes(x = age, y = -females, fill = "Female"), position = "identity") +
scale_fill_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age", y = "Population", title = "Population Pyramid") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("PopulationPyramid.png", width = 480, height = 300, units = "px")
print(p)
dev.off()

Adjust the Title and Sizes:
Enhance the appearance of the population pyramid by adjusting the title’s position and modifying text sizes.
# Enhanced population pyramid plot
b <- ggplot(combined, aes(x = age, y = males)) +
geom_bar(stat = "identity", aes(fill = "Male"), position = "identity") +
geom_bar(stat = "identity", aes(x = age, y = -females, fill = "Female"), position = "identity") +
scale_fill_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age", y = "Population", title = "Population Pyramid") +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text = element_text(size = 12),
legend.position = "top",
legend.title = element_text(size = 14),
legend.text = element_text(size = 12)
) +
coord_flip()
# Enregistrement du graphique en PNG
png("EnhancedPopulationPyramid.png", width = 480, height = 300, units = "px")
print(b)
dev.off()

Population Pyramid Using Lines:
Create a population pyramid using lines instead of bars for a smoother representation.
# Enhanced population pyramid plot using lines
c <- ggplot(combined, aes(x = age, y = males)) +
geom_line(aes(color = "Male"), size = 1.2) +
geom_line(aes(x = age, y = -females, color = "Female"), size = 1.2) +
scale_color_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age", y = "Population", title = "Population Pyramid") +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text = element_text(size = 12),
legend.position = "top",
legend.title = element_text(size = 14),
legend.text = element_text(size = 12)
) +
scale_y_continuous(labels = abs) +
coord_flip()
# Enregistrement du graphique en PNG
png("EnhancedPopulationPyramidLines.png", width = 480, height = 300, units = "px")
print(c)
dev.off()

Explanation of Adjustments:
Line Plot : Replaced bars with lines for a smoother representation of the population distribution.
Color Customization : Used blue for males and red for females, making the plot more visually distinct.
Theme Adjustments : Enhanced the theme with larger and bold titles, adjusted axis labels and text sizes, and repositioned the legend to the top.
Y-axis Labels : Applied absolute values to y-axis labels for better readability.
Flip Coordinates : Retained coord_flip() for the horizontal layout of the pyramid.
Population Pyramid By Age Groups:
Improve the visualization by defining age groups and categorizing the ages accordingly.
# Define age groups
combined <- combined %>%
mutate(age_group = cut(age, breaks = c(-Inf, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, Inf),
labels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44",
"45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+")))
Create Population Pyramid:
Create the population pyramid using the defined age groups and population data for males and females.
# Plot the population pyramid with age groups
d <- ggplot(combined, aes(x = age_group, y = males)) +
geom_bar(stat = "identity", aes(fill = "Male"), position = "identity") +
geom_bar(stat = "identity", aes(x = age_group, y = -females, fill = "Female"), position = "identity") +
scale_fill_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("Plotthepopulationpyramidwithagegroups.png", width = 480, height = 300, units = "px")
print(d)
dev.off()

Enhanced Population Pyramid Plot Using Lines:
Create an enhanced population pyramid plot using lines for a clearer representation.
# Enhanced population pyramid plot using lines
e <- ggplot(combined, aes(x = age_group, y = males)) +
geom_line(aes(color = "Male"), size = 1.2, group = 1) +
geom_line(aes(x = age_group, y = -females, color = "Female"), size = 1.2, group = 1) +
scale_color_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1), # Rotate x-axis text for better readability
axis.text.y = element_text(size = 12),
legend.position = "top",
legend.title = element_text(size = 14),
legend.text = element_text(size = 12),
panel.grid.major = element_line(color = "grey80", size = 0.5), # Add major gridlines
panel.grid.minor = element_line(color = "grey90", size = 0.25) # Add minor gridlines
) +
scale_y_continuous(labels = abs) +
coord_flip()
# Enregistrement du graphique en PNG
png("Enhancedpopulationpyramidplotusinglines.png", width = 480, height = 300, units = "px")
print(e)
dev.off()

Combined Population Pyramid Plot:
Incorporate elements from both bar and line plots to create a comprehensive and visually appealing population pyramid.
# Combined population pyramid plot using bars and lines
f <- ggplot(combined, aes(x = age_group)) +
# Bar plots for males and females
geom_bar(aes(y = males, fill = "Male"), stat = "identity", position = "identity", alpha = 0.4) +
geom_bar(aes(y = -females, fill = "Female"), stat = "identity", position = "identity", alpha = 0.4) +
# Line plots for males and females
geom_line(aes(y = males, color = "Male"), size = 1.2, group = 1) +
geom_line(aes(y = -females, color = "Female"), size = 1.2, group = 1) +
# Customizing fill and color for males and females
scale_fill_manual(name = " ", values = c("Male" = "lightblue", "Female" = "pink")) +
scale_color_manual(name = " ", values = c("Male" = "cyan", "Female" = "red")) +
# Labels and title
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
# Minimal theme and customizations
theme_minimal() +
theme(
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1), # Rotate x-axis text for better readability
axis.text.y = element_text(size = 12),
legend.position = "top",
legend.title = element_text(size = 14),
legend.text = element_text(size = 12),
panel.grid.major = element_line(color = "grey80", size = 0.5), # Add major gridlines
panel.grid.minor = element_line(color = "grey90", size = 0.25) # Add minor gridlines
) +
scale_y_continuous(labels = abs) +
coord_flip()
# Enregistrement du graphique en PNG
png("Combinedpopulationpyramidplotusingbarsandlines.png", width = 480, height = 300, units = "px")
print(f)
dev.off()

Explanation of Combined Elements:
Bar Plots: Added bar plots for males and females with light colors and semi-transparency (alpha = 0.4) for a subtle background representation of the population.
Line Plots: Added line plots for males and females with distinct colors for a clear and precise representation of the population trends.
Custom Colors: Used
scale_fill_manual
andscale_color_manual
to differentiate between male and female population visually.Labels and Titles: Set appropriate labels and titles for clarity.
Theme Customizations: Included theme settings for better readability and visual appeal, including text rotation and gridlines.
Axis and Coordinates: Adjusted y-axis labels to show absolute values and flipped coordinates for a horizontal pyramid representation.
The Modified Combined Plot Code with a Custom Background Color
# Combined population pyramid plot using bars and lines with a custom background
g <- ggplot(combined, aes(x = age_group)) +
# Bar plots for males and females
geom_bar(aes(y = males, fill = "Male"), stat = "identity", position = "identity", alpha = 0.4) +
geom_bar(aes(y = -females, fill = "Female"), stat = "identity", position = "identity", alpha = 0.4) +
# Line plots for males and females
geom_line(aes(y = males, color = "Male"), size = 1.2, group = 1) +
geom_line(aes(y = -females, color = "Female"), size = 1.2, group = 1) +
# Customizing fill and color for males and females
scale_fill_manual(name = " ", values = c("Male" = "lightblue", "Female" = "pink")) +
scale_color_manual(name = " ", values = c("Male" = "cyan", "Female" = "red")) +
# Labels and title
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
# Custom theme with modified background
theme_minimal() +
theme(
plot.background = element_rect(fill = "lightgrey", color = NA), # Change background color
panel.background = element_rect(fill = "white", color = NA), # Change panel background
plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
axis.title.x = element_text(size = 14),
axis.title.y = element_text(size = 14),
axis.text.x = element_text(size = 12, angle = 45, hjust = 1), # Rotate x-axis text for better readability
axis.text.y = element_text(size = 12),
legend.position = "top",
legend.title = element_text(size = 14),
legend.text = element_text(size = 12),
panel.grid.major = element_line(color = "grey", size = 0.5), # Add major gridlines
panel.grid.minor = element_line(color = "grey", size = 0.25) # Add minor gridlines
) +
# Customizing y-axis labels to be absolute values and flipping coordinates
scale_y_continuous(labels = abs) +
coord_flip()
# Enregistrement du graphique en PNG
png("TheModifiedCombinedPlotCodewithaCustomBackgroundColor.png", width = 480, height = 300, units = "px")
print(g)
dev.off()

This work has been conducted by Abbad Bouchra under the supervision of Flici Farid at CREAD (Centre de Recherche en Économie Appliquée et du Développement).
Last updated