An R Tutorial On Visualizing Population Pyramids
Abbad Bouchra under the supervision of Dr.Farid FLICI
Summary
In this tutorial, we explored how to create population pyramids using R and ggplot2. Population pyramids are essential tools for visualizing the distribution of population across different age groups and genders. Here’s a breakdown of what we covered:
Introduction
Loading Libraries
Data Preparation
Creating Population Pyramids
Adjustments and Customization
Population Projection
By following this tutorial, you should now have a solid understanding of how to create and customize population pyramids in RMarkdown using ggplot2.
Introduction
In this tutorial, we will learn how to create population pyramids using the ggplot2 package in Rmarkdown. Population pyramids are graphical illustrations that show the distribution of population numbers by and age groups.
Load Necessary Libraries
First, we need to load the necessary libraries for data manipulation and visualization.
# Set the CRAN mirror
options(repos = c(CRAN = "https://cran.rstudio.com/"))
# Load necessary packages and suppress warnings
if (!requireNamespace("readxl", quietly = TRUE)) {
install.packages("readxl")
}
if (!requireNamespace("ggplot2", quietly = TRUE)) {
install.packages("ggplot2")
}
if (!requireNamespace("reshape", quietly = TRUE)) {
install.packages("reshape")
}
if (!requireNamespace("reshape2", quietly = TRUE)) {
install.packages("reshape2")
}
if (!requireNamespace("pyramid", quietly = TRUE)) {
install.packages("pyramid")
}
if (!requireNamespace("animation", quietly = TRUE)) {
install.packages("animation")
}
if (!requireNamespace("ggthemes", quietly = TRUE)) {
install.packages("ggthemes")
}
# Load necessary packages and suppress warnings
suppressWarnings(suppressPackageStartupMessages(library(readxl)))
suppressWarnings(suppressPackageStartupMessages(library(ggplot2)))
suppressWarnings(suppressPackageStartupMessages(library(reshape)))
suppressWarnings(suppressPackageStartupMessages(library(reshape2)))
suppressWarnings(suppressPackageStartupMessages(library(pyramid)))
suppressWarnings(suppressPackageStartupMessages(library(animation)))
suppressWarnings(suppressPackageStartupMessages(library(ggthemes)))
suppressWarnings(suppressPackageStartupMessages(library(tidyverse)))
Upload Population Data
Assume that you have an Excel file.
# Load the dataset
proj <- read_excel("C:/Users/ABBAD TECHNOLOGY/Downloads/projection.xlsx")
# View the structure of the dataset
str(proj)
## tibble [100 × 3] (S3: tbl_df/tbl/data.frame)
## $ age : num [1:100] 0 1 2 3 4 5 6 7 8 9 ...
## $ males : num [1:100] 525931 505803 494789 479622 455602 ...
## $ females: num [1:100] 495945 474973 460463 437178 422942 ...
Prepare Data for Visualization
Now, we’ll prepare the data for creating population pyramids. We’ll select data for males and females separately.
# Read the data for males
males <- proj %>% select(age, males)
males
## # A tibble: 100 × 2
## age males
## <dbl> <dbl>
## 1 0 525931
## 2 1 505803
## 3 2 494789
## 4 3 479622
## 5 4 455602
## 6 5 440379
## 7 6 409459
## 8 7 375371
## 9 8 350141
## 10 9 330236
## # ℹ 90 more rows
# Read the data for females
females <- proj %>% select(age, females)
females
## # A tibble: 100 × 2
## age females
## <dbl> <dbl>
## 1 0 495945
## 2 1 474973
## 3 2 460463
## 4 3 437178
## 5 4 422942
## 6 5 400370
## 7 6 388023
## 8 7 376249
## 9 8 350256
## 10 9 330381
## # ℹ 90 more rows
Create Population Pyramid
Now, let’s create an initial population pyramid showing both males and females.
# Combine males and females data for plotting
combined <- data.frame(
age = males$age,
males = males$males,
females = females$females
)
# Create the population pyramid plot
p <- ggplot(combined, aes(x = age, y = males)) +
geom_bar(stat = "identity", aes(fill = "Male"), position = "identity") +
geom_bar(stat = "identity", aes(x = age, y = -females, fill = "Female"), position = "identity") +
scale_fill_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age", y = "Population", title = "Population Pyramid") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("PopulationPyramid.png", width = 480, height = 300, units = "px")
print(p)
dev.off()

Population Pyramid By 5 Age Groups
To improve the visualization, let’s define age groups and categorize the ages accordingly.
# Define age groups
combined <- combined %>%
mutate(age_group = cut(age, breaks = c(-Inf, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85, Inf),
labels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44",
"45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+")))
Create population Pyramid
Now, let’s create the population pyramid using the defined age groups and population data for males and females.
# Income
# Plot the population pyramid with age groups
d <- ggplot(combined, aes(x = age_group, y = males)) +
geom_bar(stat = "identity", aes(fill = "Male"), position = "identity") +
geom_bar(stat = "identity", aes(x = age_group, y = -females, fill = "Female"), position = "identity") +
scale_fill_manual(name = " ", values = c("Male" = "cyan", "Female" = "pink")) +
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("Plotthepopulationpyramidwithagegroups.png", width = 480, height = 300, units = "px")
print(d)
dev.off()

Reshape Data for Population Pyramid Plotting
Next, let’s reshape the pyram
data frame using the reshape2
package to prepare it for plotting the population pyramid.
pyram <- data.frame(
age_group = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+"),
male_population = c(2902126, 1786497, 1480079, 1633986, 1823605, 1886390, 1741860, 1399704, 1173186, 1009206, 897000, 784000, 678000, 567000, 467000, 378000, 298000, 210000),
female_population = c(2728599, 1662222, 1388753, 1563372, 1759641, 1819350, 1679990, 1366123, 1146892, 982000, 874000, 762000, 664000, 555000, 455000, 368000, 288000, 200000)
)
# Convertir age_group en factor avec l'ordre spécifié
pyram$age_group <- factor(pyram$age_group, levels = c("0-4", "5-9", "10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75-79", "80-84", "85+"))
# Fusion des données en format long pour le tracé de la pyramide des âges
pyram_long <- melt(pyram, id.vars = "age_group", measure.vars = c("male_population", "female_population"))
# Afficher les données reshaped
print(pyram_long)
## age_group variable value
## 1 0-4 male_population 2902126
## 2 5-9 male_population 1786497
## 3 10-14 male_population 1480079
## 4 15-19 male_population 1633986
## 5 20-24 male_population 1823605
## 6 25-29 male_population 1886390
## 7 30-34 male_population 1741860
## 8 35-39 male_population 1399704
## 9 40-44 male_population 1173186
## 10 45-49 male_population 1009206
## 11 50-54 male_population 897000
## 12 55-59 male_population 784000
## 13 60-64 male_population 678000
## 14 65-69 male_population 567000
## 15 70-74 male_population 467000
## 16 75-79 male_population 378000
## 17 80-84 male_population 298000
## 18 85+ male_population 210000
## 19 0-4 female_population 2728599
## 20 5-9 female_population 1662222
## 21 10-14 female_population 1388753
## 22 15-19 female_population 1563372
## 23 20-24 female_population 1759641
## 24 25-29 female_population 1819350
## 25 30-34 female_population 1679990
## 26 35-39 female_population 1366123
## 27 40-44 female_population 1146892
## 28 45-49 female_population 982000
## 29 50-54 female_population 874000
## 30 55-59 female_population 762000
## 31 60-64 female_population 664000
## 32 65-69 female_population 555000
## 33 70-74 female_population 455000
## 34 75-79 female_population 368000
## 35 80-84 female_population 288000
## 36 85+ female_population 200000
Plot Population Pyramid
Let’s plot the population pyramid using the reshaped data.
# Plot the population pyramid
a <- ggplot(pyram_long, aes(x = age_group, y = ifelse(variable == "female_population", -value, value), fill = variable)) +
geom_bar(stat = "identity", position = "identity") +
scale_fill_manual(name = "Sex", values = c("male_population" = "lightblue", "female_population" = "pink"),
labels = c("male_population" = "Males", "female_population" = "Females")) +
scale_y_continuous(labels = abs) +
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("Plotthepopulationpyramidwithagegroupsreshapeddata.png", width = 480, height = 300, units = "px")
print(a)
dev.off()

More Options
Adjusted Stacked Population Pyramid
Let’s create a stacked population pyramid with adjusted y-axis breaks and labels.
# Adjust the y-axis breaks and labels for better visualization
breaks <- seq(-max(pyram_long$value), max(pyram_long$value), by = 50000)
labels <- abs(breaks)
# Plot the adjusted stacked population pyramid
z <- ggplot(pyram_long, aes(x = age_group, y = ifelse(variable == "female_population", -value, value), fill = variable)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(name = "Sex", values = c("male_population" = "lightblue", "female_population" = "pink"),
labels = c("male_population" = "Males", "female_population" = "Females")) +
scale_y_continuous(breaks = breaks, labels = labels) +
labs(x = "Age Group", y = "Population", title = "Adjusted Stacked Population Pyramid by Age Group") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("AdjustedStackedPopulationPyramidbyAgeGroup.png", width = 480, height = 300, units = "px")
print(z)
dev.off()

Population Pyramid with ggplot2
Let’s create a population pyramid using ggplot2
.
# Install and load necessary packages if not already installed
if (!require("tidyverse")) install.packages("tidyverse")
library(tidyverse)
# Prepare the data
pyram_long <- melt(pyram, id.vars = "age_group", measure.vars = c("male_population", "female_population"))
# Adjust the data for males to be negative
pyram_long$value <- ifelse(pyram_long$variable == "female_population", pyram_long$value, -pyram_long$value)
# Define the y-axis breaks and labels for better visualization
breaks <- seq(-max(abs(pyram_long$value)), max(abs(pyram_long$value)), by = 50000)
labels <- abs(breaks)
# Plot the population pyramid using ggplot2
r <- ggplot(pyram_long, aes(x = age_group, y = value, fill = variable)) +
geom_bar(stat = "identity", position = "stack") +
scale_fill_manual(name = "Sex", values = c("male_population" = "lightblue", "female_population" = "pink"),
labels = c("male_population" = "Males", "female_population" = "Females")) +
scale_y_continuous(breaks = breaks, labels = labels) +
labs(x = "Age Group", y = "Population", title = "Population Pyramid by Age Group") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("PopulationPyramidbyAgeGroup.png", width = 480, height = 300, units = "px")
print(r)
dev.off()

Population Projection
In this section, we will use the given dataset to create a population projection.
# Assuming that we already have the projection data in 'proj'
# Here, we'll simply visualize the projection data for future years (if available)
# For demonstration purposes, let's assume 'proj' already contains future years
# Prepare the data for population projection visualization
projection_long <- proj %>%
gather(key = "sex", value = "population", -age) %>%
mutate(sex = recode(sex, "males" = "Male", "females" = "Female"))
# Plot the population projection
s <- ggplot(projection_long, aes(x = age, y = ifelse(sex == "Female", -population, population), fill = sex)) +
geom_bar(stat = "identity", position = "identity") +
scale_fill_manual(name = "Sex", values = c("Male" = "lightblue", "Female" = "pink")) +
scale_y_continuous(labels = abs) +
labs(x = "Age", y = "Population", title = "Population Projection") +
theme_minimal() +
coord_flip()
# Enregistrement du graphique en PNG
png("PopulationProjection.png", width = 480, height = 300, units = "px")
print(s)
dev.off()

This work has been conducted by Abbad Bouchra under the supervision of Flici Farid at CREAD (Centre de Recherche en Économie Appliquée et du Développement).
Last updated