4  Disposition of Participants Table

This article demonstrates how to create a disposition table following ICH E3 guidance using rtflite, based on the R4CSR example.

4.1 Overview

The disposition table summarizes the flow of participants through the study, including: - Number of participants randomized - Number who discontinued and reasons for discontinuation
- Number who completed the study

4.2 Setup

import polars as pl
import rtflite as rtf
from importlib.resources import files

data_path = files("rtflite.data").joinpath("adsl.parquet")
adsl = pl.read_parquet(data_path)

treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]

4.3 Calculate Disposition Statistics

n_rand = (
    adsl
    .group_by("TRT01P")
    .agg(n=pl.len())
    .sort("TRT01P")
)

def calc_stats(df, filter_expr=None):
    if filter_expr is not None:
        df = df.filter(filter_expr)
    
    counts = (
        df
        .group_by("TRT01P")
        .agg(n=pl.len())
        .join(n_rand, on="TRT01P", suffix="_total")
        .with_columns(
            pct=(100 * pl.col("n") / pl.col("n_total")).round(1)
        )
        .sort("TRT01P")
    )
    return counts

stats = {
    "completed": calc_stats(adsl, pl.col("DCREASCD") == "Completed"),
    "discontinued": calc_stats(adsl, pl.col("DCREASCD") != "Completed")
}

disc_reasons = (
    adsl
    .filter(pl.col("DCREASCD") != "Completed")
    .group_by(["TRT01P", "DCREASCD"])
    .agg(n=pl.len())
    .join(n_rand, on="TRT01P", suffix="_total")
    .with_columns(
        pct=(100 * pl.col("n") / pl.col("n_total")).round(1)
    )
    .sort(["DCREASCD", "TRT01P"])
)

4.4 Build Table Data

def format_row(label, stats_df=None, reason=None):
    row = [label]
    
    for trt in treatments:
        if stats_df is None:  # For total participants row
            n = n_rand.filter(pl.col("TRT01P") == trt)["n"][0]
            row.extend([str(n), ""])
        else:
            # Filter for specific treatment (and reason if provided)
            filter_expr = pl.col("TRT01P") == trt
            if reason:
                filter_expr = filter_expr & (pl.col("DCREASCD") == reason)
            
            data = stats_df.filter(filter_expr)
            if len(data) > 0:
                row.extend([str(data["n"][0]), f"({data['pct'][0]:.1f}%)"])
            else:
                row.extend(["0", "(0.0%)"])
    
    return row

table_data = [
    format_row("Participants in population"),
    format_row("Completed", stats["completed"]),
    format_row("Discontinued", stats["discontinued"])
]

for reason in disc_reasons["DCREASCD"].unique().sort():
    table_data.append(
        format_row(f"    {reason}", disc_reasons, reason)
    )

col_headers = [""] + [f"{trt}_{col}" for trt in treatments for col in ["n", "(%)"]]
df_disp = pl.DataFrame(table_data, schema=col_headers, orient="row")

df_disp

4.5 Generate RTF Output

doc_disp = rtf.RTFDocument(
    df=df_disp,
    rtf_title=rtf.RTFTitle(text=["Disposition of Participants"]),
    rtf_column_header=[
        rtf.RTFColumnHeader(
            text=[""] + treatments,
            col_rel_width=[3] + [2] * 3,
            text_justification=["l"] + ["c"] * 3,
        ),
        rtf.RTFColumnHeader(
            text=["", "n", "(%)", "n", "(%)", "n", "(%)"],
            col_rel_width=[3] + [1] * 6,
            text_justification=["l"] + ["c"] * 6,
            border_top=[""] + ["single"] * 6,
            border_left=["single"] + ["single", ""] * 3
        )
    ],
    rtf_body=rtf.RTFBody(
        col_rel_width=[3] + [1] * 6,
        text_justification=["l"] + ["c"] * 6,
        border_left=["single"] + ["single", ""] * 3
    ),
    rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"])
)

doc_disp.write_rtf("../rtf/tlf_disposition.rtf")