import polars as pl
import rtflite as rtf
from importlib.resources import files
= files("rtflite.data").joinpath("adsl.parquet")
data_path = pl.read_parquet(data_path)
adsl
= ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"] treatments
4 Disposition of Participants Table
This article demonstrates how to create a disposition table following ICH E3 guidance using rtflite, based on the R4CSR example.
4.1 Overview
The disposition table summarizes the flow of participants through the study, including: - Number of participants randomized - Number who discontinued and reasons for discontinuation
- Number who completed the study
4.2 Setup
4.3 Calculate Disposition Statistics
= (
n_rand
adsl"TRT01P")
.group_by(=pl.len())
.agg(n"TRT01P")
.sort(
)
def calc_stats(df, filter_expr=None):
if filter_expr is not None:
= df.filter(filter_expr)
df
= (
counts
df"TRT01P")
.group_by(=pl.len())
.agg(n="TRT01P", suffix="_total")
.join(n_rand, on
.with_columns(=(100 * pl.col("n") / pl.col("n_total")).round(1)
pct
)"TRT01P")
.sort(
)return counts
= {
stats "completed": calc_stats(adsl, pl.col("DCREASCD") == "Completed"),
"discontinued": calc_stats(adsl, pl.col("DCREASCD") != "Completed")
}
= (
disc_reasons
adslfilter(pl.col("DCREASCD") != "Completed")
."TRT01P", "DCREASCD"])
.group_by([=pl.len())
.agg(n="TRT01P", suffix="_total")
.join(n_rand, on
.with_columns(=(100 * pl.col("n") / pl.col("n_total")).round(1)
pct
)"DCREASCD", "TRT01P"])
.sort([ )
4.4 Build Table Data
def format_row(label, stats_df=None, reason=None):
= [label]
row
for trt in treatments:
if stats_df is None: # For total participants row
= n_rand.filter(pl.col("TRT01P") == trt)["n"][0]
n str(n), ""])
row.extend([else:
# Filter for specific treatment (and reason if provided)
= pl.col("TRT01P") == trt
filter_expr if reason:
= filter_expr & (pl.col("DCREASCD") == reason)
filter_expr
= stats_df.filter(filter_expr)
data if len(data) > 0:
str(data["n"][0]), f"({data['pct'][0]:.1f}%)"])
row.extend([else:
"0", "(0.0%)"])
row.extend([
return row
= [
table_data "Participants in population"),
format_row("Completed", stats["completed"]),
format_row("Discontinued", stats["discontinued"])
format_row(
]
for reason in disc_reasons["DCREASCD"].unique().sort():
table_data.append(f" {reason}", disc_reasons, reason)
format_row(
)
= [""] + [f"{trt}_{col}" for trt in treatments for col in ["n", "(%)"]]
col_headers = pl.DataFrame(table_data, schema=col_headers, orient="row")
df_disp
df_disp
4.5 Generate RTF Output
= rtf.RTFDocument(
doc_disp =df_disp,
df=rtf.RTFTitle(text=["Disposition of Participants"]),
rtf_title=[
rtf_column_header
rtf.RTFColumnHeader(=[""] + treatments,
text=[3] + [2] * 3,
col_rel_width=["l"] + ["c"] * 3,
text_justification
),
rtf.RTFColumnHeader(=["", "n", "(%)", "n", "(%)", "n", "(%)"],
text=[3] + [1] * 6,
col_rel_width=["l"] + ["c"] * 6,
text_justification=[""] + ["single"] * 6,
border_top=["single"] + ["single", ""] * 3
border_left
)
],=rtf.RTFBody(
rtf_body=[3] + [1] * 6,
col_rel_width=["l"] + ["c"] * 6,
text_justification=["single"] + ["single", ""] * 3
border_left
),=rtf.RTFSource(text=["Source: ADSL dataset"])
rtf_source
)
"../rtf/tlf_disposition.rtf") doc_disp.write_rtf(