import polars as pl
import rtflite as rtf
from importlib.resources import files
3 Baseline Characteristics Table
This article demonstrates how to create a baseline characteristics table for clinical study reports using rtflite.
3.1 Overview
Baseline characteristics tables summarize demographic and clinical characteristics of study participants at enrollment. These tables are essential for understanding the study population and assessing comparability between treatment groups.
3.2 Imports
3.3 Data Preparation
= files("rtflite.data").joinpath("adsl.parquet")
data_path
= (
adsl_baseline
pl.read_parquet(data_path)filter(pl.col("SAFFL") == "Y")
."USUBJID", "TRT01P", "AGE", "SEX", "RACE"])
.select([
.with_columns(["SEX").replace({"F": "Female", "M": "Male"}),
pl.col("RACE").str.to_titlecase()
pl.col(
]) )
3.4 Statistics Function
def get_statistics(df, var, is_continuous=False):
= [
expr round(1).alias("mean"),
pl.col(var).mean().round(1).alias("sd"),
pl.col(var).std().round(1).alias("median"),
pl.col(var).median().min().alias("min"),
pl.col(var).max().alias("max")
pl.col(var).
]
if is_continuous:
# Continuous statistics by treatment
= df.group_by("TRT01P").agg(expr)
by_treatment
# Overall statistics
= df.select(expr).row(0)
overall
return by_treatment, overall
else:
# Categorical counts and percentages
= df.height
total_n = (
by_treatment "TRT01P", var])
df.group_by([len()
."TRT01P").len().rename({"len": "total"}), on="TRT01P")
.join(df.group_by(
.with_columns(format("{} ({}%)",
pl."len"),
pl.col(100 * pl.col("len") / pl.col("total")).round(1)
("formatted")
).alias(
)
)
# Overall counts
= (
overall
df.group_by(var)len()
.
.with_columns(format("{} ({}%)",
pl."len"),
pl.col(100 * pl.col("len") / total_n).round(1)
("formatted")
).alias(
)
)
return by_treatment, overall
3.5 Build Table Data
# Treatment groups and counts
= ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]
treatments = dict(
treatment_counts "TRT01P").len().iter_rows()
adsl_baseline.group_by(
)
def create_variable_rows(df, var_name, categories=None, is_continuous=False):
= [[var_name, "", "", "", ""]]
rows
= get_statistics(df, var_name, is_continuous)
by_treatment, overall_stats
if is_continuous:
# Mean (SD) row
= [" Mean (SD)"]
row for trt in treatments:
= by_treatment.filter(pl.col("TRT01P") == trt)
trt_stats if trt_stats.height > 0:
= trt_stats.select(["mean", "sd"]).row(0)
mean, sd f"{mean} ({sd})")
row.append(else:
"")
row.append(f"{overall_stats[0]} ({overall_stats[1]})")
row.append(
rows.append(row)
# Median [Min, Max] row
= [" Median [Min, Max]"]
row for trt in treatments:
= by_treatment.filter(pl.col("TRT01P") == trt)
trt_stats if trt_stats.height > 0:
= trt_stats.select(["median", "min", "max"]).row(0)
median, min_val, max_val f"{median} [{min_val}, {max_val}]")
row.append(else:
"")
row.append(f"{overall_stats[2]} [{overall_stats[3]}, {overall_stats[4]}]")
row.append(
rows.append(row)else:
# Categorical variable rows
for cat in categories:
= [f" {cat}"]
row
for trt in treatments:
= by_treatment.filter(
trt_data "TRT01P") == trt) & (pl.col(var_name) == cat)
(pl.col(
)if trt_data.height > 0:
"formatted"][0])
row.append(trt_data[else:
"0 (0.0%)")
row.append(
# Overall column
= overall_stats.filter(pl.col(var_name) == cat)
overall_data if overall_data.height > 0:
"formatted"][0])
row.append(overall_data[else:
"0 (0.0%)")
row.append(
rows.append(row)
return rows
# Build complete table
= []
table_data "SEX", ["Female", "Male"]))
table_data.extend(create_variable_rows(adsl_baseline, "AGE", is_continuous=True))
table_data.extend(create_variable_rows(adsl_baseline,
table_data.extend(create_variable_rows("RACE",
adsl_baseline, "Black Or African American", "White", "American Indian Or Alaska Native"]
[
))
= pl.DataFrame(table_data, orient="row")
df_baseline
df_baseline
3.6 Create RTF Output
# Column headers with N counts
= [""] + [f"{trt}\n(N={treatment_counts[trt]})" for trt in treatments] + [f"Overall\n(N={adsl_baseline.height})"]
col_headers
= rtf.RTFDocument(
doc_baseline =df_baseline,
df=rtf.RTFTitle(
rtf_title=["Baseline Characteristics of Participants", "(All Participants Randomized)"]
text
),=rtf.RTFColumnHeader(
rtf_column_header=col_headers,
text=["l"] + ["c"] * 4
text_justification
),=rtf.RTFBody(
rtf_body=[2] + [1] * 4,
col_rel_width=["l"] + ["c"] * 4
text_justification
)
)
"../rtf/tlf_baseline.rtf") doc_baseline.write_rtf(