%% Copyright 2021-2024 Tobias Enderle
%%
%% This work may be distributed and/or modified under the
%% conditions of the LaTeX Project Public License, either version 1.3c
%% of this license or (at your option) any later version.
%% The latest version of this license is in
%%   http://www.latex-project.org/lppl.txt
%% and version 1.3c or later is part of all distributions of LaTeX
%% version 2005/12/01 or later.

\documentclass{article}

\usepackage{pgfplots}
\pgfplotsset{compat=1.15}
\usepackage{pyluatex}
\usepackage{booktabs}

\usepackage{url}
\urldef\dataurl\url{https://population.un.org/wpp/Download/Files/1_Indicators%20(Standard)/CSV_FILES/WPP2019_TotalPopulationBySex.csv}

\begin{python}
import pandas as pd
import datetime

def pgf_coords(x, y):
    return ' '.join(f'({a},{b})' for a, b in zip(x, y))

file = 'population.csv'
df = pd.read_csv(file)
data = df.set_index(['Location', 'Time']).sort_index()
world_data = data.loc['World']

year = datetime.datetime.now().year
current_pop = world_data.loc[year].PopTotal

diffs = world_data.PopTotal.diff()
highest_increase_year = diffs.idxmax()
highest_increase = diffs.max() / 1e3
\end{python}

\title{PyLuaTeX Example -- Data Visualization}
\author{Tobias Enderle}

\begin{document}

\maketitle

This document demonstrates how data can be visualized flexibly in a \LaTeX{}
document with the help of the Python package \emph{pandas} and the \LaTeX{}
package \emph{pgfplots}.

We show some statistics of the United Nations' \emph{World Population Prospects}%
\footnote{United Nations, Department of Economic and Social Affairs, Population
Division (2019). World Population Prospects 2019, Online Edition. Rev.\ 1.} in the
form of plots, tables or individual values.
The data file \texttt{\py{file}} is a subset of the file
\texttt{WPP2019\_TotalPopulationBySex.csv}\footnote{\dataurl{} (last accessed:
2021/07/04)}.

The following plot shows the estimated world population between \py{df.Time.iloc[0]}
and \py{df.Time.iloc[-1]}. The red lines mark the current year and population.

\begin{center}
\begin{tikzpicture}
\begin{axis}[
    width=\textwidth,
    height=0.7\textwidth,
    enlarge x limits=false,
    ymin=0,
    ylabel={Estimated world population in billions},
    xlabel={Year},
    xticklabel style={/pgf/number format/1000 sep=},
    grid=both,
]
\addplot[no marks,black,thick] coordinates {
    \py{pgf_coords(world_data.index, world_data.PopTotal / 1e6)}
};
\draw[red,thick] (axis cs:\py{year},0) |- (axis cs:0,\py{current_pop / 1e6});
\end{axis}
\end{tikzpicture}
\end{center}

The year with the highest increase of \py{round(highest_increase)} million people
was \py{highest_increase_year}. The world population is expected to reach a value
of \py{round(world_data.loc[2100].PopTotal / 1e6, 2)} billion by 2100.

The following table shows the total as well as the male and female population at
the beginning of each decade. The values are given in thousands.
\begin{center}
\begin{python}
table_data = world_data.iloc[::10].reset_index()
table_data = table_data[['Time', 'PopTotal', 'PopMale', 'PopFemale']]
table_data.columns = ['Year', 'Total (thousands)', 'Male (thousands)', 'Female (thousands)']
table = table_data.style.hide(axis='index').format(
    formatter={
        'Total (thousands)': '{:,.0f}',
        'Male (thousands)': '{:,.0f}',
        'Female (thousands)': '{:,.0f}'
    }
).to_latex(hrules=True)
print(table)
\end{python}
\end{center}

The plot below shows the estimated population of China and the United States
of America (USA). The numbers are given in millions.
\begin{center}
\begin{python}
marker_year = 2030
plots = [
    ('China', data.loc['China'].PopTotal / 1e3),
    ('USA', data.loc['United States of America'].PopTotal / 1e3)
]
\end{python}
\begin{tikzpicture}
\begin{axis}[
    width=\textwidth,
    height=0.55\textwidth,
    enlarge x limits=false,
    ymin=0,
    ylabel={Estimated population in millions},
    xlabel={Year},
    xticklabel style={/pgf/number format/1000 sep=},
    grid=both,
]
\pgfplotsinvokeforeach{0,1}{
    \addplot[no marks,black,thick] coordinates {
        \py{pgf_coords(plots[#1][1].index, plots[#1][1])}
    };
    \node[below] at (axis cs:\py{marker_year},\py{plots[#1][1].loc[marker_year]})
        {\py{plots[#1][0]}};
}
\end{axis}
\end{tikzpicture}
\end{center}

\end{document}