Baseline characteristics tables summarize the demographic and clinical characteristics of study participants at enrollment. Following ICH E3 guidance, these tables are essential for understanding the study population and assessing comparability between treatment groups.
This tutorial shows you how to create a baseline characteristics table using Python’s rtflite package.
4.2 Step 1: Load Data
We start by loading the Subject-level Analysis Dataset (ADSL) and filtering to the safety population.
We’ll build the table row by row following the standard baseline table format.
# Helper function to get value for a treatment groupdef get_value(df, treatment):"""Get value for a specific treatment group or return default""" result = df.filter(pl.col("TRT01P") == treatment)return result[result.columns[-1]][0] if result.height >0else"0 (0.0%)"# Build the baseline table structuretable_rows = []# Age sectiontable_rows.append(["Age (years)", "", "", ""])# Age Mean (SD) rowage_mean_row = [" Mean (SD)"] + [ get_value(age_formatted.select(["TRT01P", "mean_sd"]), trt).replace("0 (0.0%)", "")for trt in ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]]table_rows.append(age_mean_row)# Age Median [Min, Max] rowage_median_row = [" Median [Min, Max]"] + [ get_value(age_formatted.select(["TRT01P", "median_range"]), trt).replace("0 (0.0%)", "")for trt in ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]]table_rows.append(age_median_row)# Sex sectiontable_rows.append(["Sex", "", "", ""])for sex_cat in ["Female", "Male"]: sex_data = sex_formatted.filter(pl.col("SEX") == sex_cat) sex_row = [f" {sex_cat}"] + [ get_value(sex_data, trt)for trt in ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"] ] table_rows.append(sex_row)# Race sectiontable_rows.append(["Race", "", "", ""])for race_cat in ["White", "Black Or African American", "American Indian Or Alaska Native"]: race_data = race_formatted.filter(pl.col("RACE") == race_cat) race_row = [f" {race_cat}"] + [ get_value(race_data, trt)for trt in ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"] ] table_rows.append(race_row)# Create DataFrame from table rowsbaseline_table = pl.DataFrame( table_rows, schema=["Characteristic", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"], orient="row")baseline_table
shape: (10, 4)
Characteristic
Placebo
Xanomeline Low Dose
Xanomeline High Dose
str
str
str
str
"Age (years)"
""
""
""
" Mean (SD)"
"75.2 (8.59)"
"75.7 (8.29)"
"74.4 (7.89)"
" Median [Min, Max]"
"76.0 [52.0, 89.0]"
"77.5 [51.0, 88.0]"
"76.0 [56.0, 88.0]"
"Sex"
""
""
""
" Female"
"53 (61.6%)"
"50 (59.5%)"
"40 (47.6%)"
" Male"
"33 (38.4%)"
"34 (40.5%)"
"44 (52.4%)"
"Race"
""
""
""
" White"
"78 (90.7%)"
"78 (92.9%)"
"74 (88.1%)"
" Black Or African American"
"8 (9.3%)"
"6 (7.1%)"
"9 (10.7%)"
" American Indian Or Alaska Na…
"0 (0.0%)"
"0 (0.0%)"
"1 (1.2%)"
4.6 Step 5: Generate Publication-Ready Output
Finally, we format the baseline table for regulatory submission using the rtflite package.
# Get treatment group sizes for column headerstreatment_n = adsl.group_by("TRT01P").len().sort("TRT01P")n_placebo = treatment_n.filter(pl.col("TRT01P") =="Placebo")["len"][0]n_low = treatment_n.filter(pl.col("TRT01P") =="Xanomeline Low Dose")["len"][0]n_high = treatment_n.filter(pl.col("TRT01P") =="Xanomeline High Dose")["len"][0]doc_baseline = rtf.RTFDocument( df=baseline_table, rtf_title=rtf.RTFTitle( text=["Baseline Characteristics of Participants", "(All Participants Randomized)" ] ), rtf_column_header=rtf.RTFColumnHeader( text=["Characteristic",f"Placebo\n(N={n_placebo})",f"Xanomeline Low Dose\n(N={n_low})",f"Xanomeline High Dose\n(N={n_high})" ], text_justification=["l", "c", "c", "c"], col_rel_width=[3, 2, 2, 2] ), rtf_body=rtf.RTFBody( text_justification=["l", "c", "c", "c"], col_rel_width=[3, 2, 2, 2] ), rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"]))doc_baseline.write_rtf("rtf/tlf_baseline.rtf") # Save as RTF for submission