diff --git a/.github/workflows/ci-docs.yaml b/.github/workflows/ci-docs.yaml index 4b13d9ef9..461f3e2a3 100644 --- a/.github/workflows/ci-docs.yaml +++ b/.github/workflows/ci-docs.yaml @@ -14,6 +14,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - uses: actions/setup-python@v5 with: python-version: "3.10" diff --git a/.github/workflows/ci-tests.yaml b/.github/workflows/ci-tests.yaml index 1d643f9b3..0b742194e 100644 --- a/.github/workflows/ci-tests.yaml +++ b/.github/workflows/ci-tests.yaml @@ -20,6 +20,8 @@ jobs: steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v5 with: @@ -44,6 +46,8 @@ jobs: runs-on: windows-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 with: @@ -59,6 +63,8 @@ jobs: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 with: @@ -83,6 +89,8 @@ jobs: # - uses: actions/checkout@v4 + with: + fetch-depth: 0 - name: Set up Python uses: actions/setup-python@v5 with: @@ -108,6 +116,8 @@ jobs: if: github.event_name == 'release' steps: - uses: actions/checkout@v2 + with: + fetch-depth: 0 - uses: actions/setup-python@v2 with: python-version: "3.10" diff --git a/docs/examples/index.qmd b/docs/examples/index.qmd index fe1a50fd2..48b8cc438 100644 --- a/docs/examples/index.qmd +++ b/docs/examples/index.qmd @@ -7,6 +7,7 @@ format: html: code-fold: true code-summary: "Show the Code" +toc: false --- :::::: {.column-page} @@ -419,3 +420,198 @@ coffee_table ::::: :::::: +## Ecosystem + +:::::: {.column-page} +::::: {.grid} + +:::{.g-col-lg-6 .g-col-12 .shrink-example} + +See the code (gt-extras site) ⬀ + + +```{python} +# | echo: false +import polars as pl +from great_tables import GT, html +import gt_extras as gte + +pre_tax_col = "gini_market__age_total" +post_tax_col = "gini_disposable__age_total" + +# Read the data +df = pl.read_csv( + "https://raw.githubusercontent.com/rfordatascience/tidytuesday/main/data/2025/2025-08-05/income_inequality_raw.csv", + schema={ + "Entity": pl.String, + "Code": pl.String, + "Year": pl.Int64, + post_tax_col: pl.Float64, + pre_tax_col: pl.Float64, + "population_historical": pl.Int64, + "owid_region": pl.String, + }, + null_values=["NA", ""], +) + +# Propogate the region field to all rows of that country +df = ( + df.sort("Entity") + .group_by("Entity", maintain_order=True) + .agg( + [ + pl.col("Code"), + pl.col("Year"), + pl.col(post_tax_col), + pl.col(pre_tax_col), + pl.col("population_historical"), + # Most important action happens here + pl.col("owid_region").fill_null(strategy="backward"), + ] + ) + .explode( + [ + "Code", + "Year", + post_tax_col, + pre_tax_col, + "population_historical", + "owid_region", + ] + ) +) + +# Drop rows where there is a null in either pre-tax or post-tax cols +df = df.drop_nulls( + subset=( + pl.col(post_tax_col), + pl.col(pre_tax_col), + ) +) + +# Compute the percent reduction in gini coefficient. +df = df.with_columns( + ((pl.col(pre_tax_col) - pl.col(post_tax_col)) / pl.col(pre_tax_col) * 100) + .round(2) + .alias("gini_pct_change") +) + +# Calculate 5-year benchmark (mean) of percent change for each country +df = df.with_columns( + pl.col("gini_pct_change") + .rolling_mean(window_size=5) + .over(pl.col("Entity")) + .alias("gini_pct_benchmark_5yr") +) + +# Select rows with large population in the year 2020, sorted by coefficient post-tax +df = ( + # Choose a smaller pop to include more countries + df.filter(pl.col("population_historical").gt(40000000)) + .filter(pl.col("Year").eq(2020)) + .sort(by=pl.col(post_tax_col)) +) + + +# Scale population +df = df.with_columns((pl.col("population_historical").log10()).alias("pop_log")) +pop_min = df["pop_log"].min() / 1 +pop_max = df["pop_log"].max() + +# Set up gt-extras icons, scaling population to 1-10 range +df = df.with_columns( + ((pl.col("pop_log") - pop_min) / (pop_max - pop_min) * 10 + 1) + .round(0) + .cast(pl.Int64) + .alias("pop_icons") +) + +# Format original population value with commas +df = df.with_columns( + pl.col("population_historical").map_elements( + lambda x: f"{int(x):,}" if x is not None else None, return_dtype=pl.String + ) +) + +# Apply gte.fa_icon_repeat to each entry in the pop_icons column +df_with_icons = df.with_columns( + pl.col("pop_icons").map_elements( + lambda x: gte.fa_icon_repeat(name="person", repeats=int(x)), + return_dtype=pl.String, + ) +) + +# Generate the table, before gt-extras add-ons +gt = ( + GT(df_with_icons, rowname_col="Entity", groupname_col="owid_region") + .tab_header( + "Income Inequality Before and After Taxes in 2020", + "As measured by the Gini coefficient, where 0 is best and 1 is worst", + ) + .cols_move("pop_icons", after=pre_tax_col) + .cols_align("left") + .cols_hide(["Year", "pop_log", "population_historical"]) + .fmt_flag("Code") + .cols_label( + { + "Code": "", + "gini_pct_change": "Improvement Post Taxes", + "pop_icons": "Population", + } + ) + .tab_source_note( + html( + """ +