adsl = pl.read_parquet("data/adsl.parquet")6 Study population
Create study population summary tables to document participant counts across different analysis populations. Learn to use population flags in ADSL data and generate regulatory-compliant population tables with rtflite.
6.1 Overview
Clinical trials define multiple analysis populations based on different inclusion criteria. Following ICH E3 guidance, regulatory submissions must clearly document the number of participants in each analysis population to support the validity of statistical analyses.
The key analysis populations typically include:
- All Randomized: Total participants who entered the study
- Intent-to-Treat (ITT): Participants included in the primary efficacy analysis
- Efficacy Population: Participants who meet specific criteria for efficacy evaluation
- Safety Population: Participants who received at least one dose of study treatment
This tutorial shows you how to create a population summary table using Python’s rtflite package.
6.2 Step 1: Load data
We start by loading the Subject-level Analysis Dataset (ADSL), which contains population flags for each participant.
Let’s examine the key population flag variables we’ll use:
- USUBJID: Unique participant identifier
- TRT01P: Planned treatment group
- ITTFL: Intent-to-treat population flag (Y/N)
- EFFFL: Efficacy population flag (Y/N)
- SAFFL: Safety population flag (Y/N)
adsl.select(["USUBJID", "TRT01P", "ITTFL", "EFFFL", "SAFFL"])| USUBJID | TRT01P | ITTFL | EFFFL | SAFFL |
|---|---|---|---|---|
| str | str | str | str | str |
| "01-701-1015" | "Placebo" | "Y" | "Y" | "Y" |
| "01-701-1023" | "Placebo" | "Y" | "Y" | "Y" |
| "01-701-1028" | "Xanomeline High Dose" | "Y" | "Y" | "Y" |
| … | … | … | … | … |
| "01-718-1371" | "Xanomeline High Dose" | "Y" | "Y" | "Y" |
| "01-718-1427" | "Xanomeline High Dose" | "Y" | "Y" | "Y" |
6.3 Step 2: Calculate treatment group totals
First, we calculate the total number of randomized participants in each treatment group, which will serve as the denominator for percentage calculations.
totals = adsl.group_by("TRT01P").agg(
total = pl.len()
)
totals| TRT01P | total |
|---|---|
| str | u32 |
| "Xanomeline High Dose" | 84 |
| "Placebo" | 86 |
| "Xanomeline Low Dose" | 84 |
6.4 Step 3: Define helper function
We create a reusable function to count participants by treatment group for any population subset.
def count_by_treatment(data, population_name):
"""Count participants by treatment group and add population label"""
return data.group_by("TRT01P").agg(
n = pl.len()
).with_columns(
population = pl.lit(population_name)
)6.5 Step 4: Count each population
Now we calculate participant counts for each analysis population.
6.5.1 All randomized participants
pop_all = count_by_treatment(
data=adsl,
population_name="Participants in population"
)
pop_all| TRT01P | n | population |
|---|---|---|
| str | u32 | str |
| "Placebo" | 86 | "Participants in population" |
| "Xanomeline Low Dose" | 84 | "Participants in population" |
| "Xanomeline High Dose" | 84 | "Participants in population" |
6.5.2 Intent-to-treat population
adsl_itt = adsl.filter(pl.col("ITTFL") == "Y")
pop_itt = count_by_treatment(
data=adsl_itt,
population_name="Participants included in ITT population"
)
pop_itt| TRT01P | n | population |
|---|---|---|
| str | u32 | str |
| "Xanomeline Low Dose" | 84 | "Participants included in ITT p… |
| "Xanomeline High Dose" | 84 | "Participants included in ITT p… |
| "Placebo" | 86 | "Participants included in ITT p… |
6.5.3 Efficacy population
adsl_eff = adsl.filter(pl.col("EFFFL") == "Y")
pop_eff = count_by_treatment(
data=adsl_eff,
population_name="Participants included in efficacy population"
)
pop_eff| TRT01P | n | population |
|---|---|---|
| str | u32 | str |
| "Xanomeline High Dose" | 74 | "Participants included in effic… |
| "Xanomeline Low Dose" | 81 | "Participants included in effic… |
| "Placebo" | 79 | "Participants included in effic… |
6.5.4 Safety population
adsl_saf = adsl.filter(pl.col("SAFFL") == "Y")
pop_saf = count_by_treatment(
data=adsl_saf,
population_name="Participants included in safety population"
)
pop_saf| TRT01P | n | population |
|---|---|---|
| str | u32 | str |
| "Xanomeline High Dose" | 84 | "Participants included in safet… |
| "Xanomeline Low Dose" | 84 | "Participants included in safet… |
| "Placebo" | 86 | "Participants included in safet… |
6.6 Step 5: Combine all populations
We stack all population counts together into a single dataset.
all_populations = pl.concat([
pop_all,
pop_itt,
pop_eff,
pop_saf
])
all_populations| TRT01P | n | population |
|---|---|---|
| str | u32 | str |
| "Placebo" | 86 | "Participants in population" |
| "Xanomeline Low Dose" | 84 | "Participants in population" |
| "Xanomeline High Dose" | 84 | "Participants in population" |
| … | … | … |
| "Xanomeline Low Dose" | 84 | "Participants included in safet… |
| "Placebo" | 86 | "Participants included in safet… |
6.7 Step 6: Calculate percentages
We join with the total counts and calculate what percentage each population represents of the total randomized participants.
stats_with_pct = all_populations.join(
totals,
on="TRT01P"
).with_columns(
pct = (100.0 * pl.col("n") / pl.col("total")).round(1)
)
stats_with_pct| TRT01P | n | population | total | pct |
|---|---|---|---|---|
| str | u32 | str | u32 | f64 |
| "Placebo" | 86 | "Participants in population" | 86 | 100.0 |
| "Xanomeline Low Dose" | 84 | "Participants in population" | 84 | 100.0 |
| "Xanomeline High Dose" | 84 | "Participants in population" | 84 | 100.0 |
| … | … | … | … | … |
| "Xanomeline Low Dose" | 84 | "Participants included in safet… | 84 | 100.0 |
| "Placebo" | 86 | "Participants included in safet… | 86 | 100.0 |
6.8 Step 7: Format display values
For the final table, we format the display text. The total randomized count shows just “N”, while subset populations show “N (%)”.
formatted_stats = stats_with_pct.with_columns(
display = pl.when(pl.col("population") == "Participants in population")
.then(pl.col("n").cast(str))
.otherwise(
pl.concat_str([
pl.col("n").cast(str),
pl.lit(" ("),
pl.col("pct").round(1).cast(str),
pl.lit(")")
])
)
)
formatted_stats| TRT01P | n | population | total | pct | display |
|---|---|---|---|---|---|
| str | u32 | str | u32 | f64 | str |
| "Placebo" | 86 | "Participants in population" | 86 | 100.0 | "86" |
| "Xanomeline Low Dose" | 84 | "Participants in population" | 84 | 100.0 | "84" |
| "Xanomeline High Dose" | 84 | "Participants in population" | 84 | 100.0 | "84" |
| … | … | … | … | … | … |
| "Xanomeline Low Dose" | 84 | "Participants included in safet… | 84 | 100.0 | "84 (100.0)" |
| "Placebo" | 86 | "Participants included in safet… | 86 | 100.0 | "86 (100.0)" |
6.9 Step 8: Create final table
We reshape the data from long format (rows for each treatment-population combination) to wide format (columns for each treatment group).
df_overview = formatted_stats.pivot(
values="display",
index="population",
on="TRT01P",
maintain_order=True
).select(
["population", "Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]
)
df_overview| population | Placebo | Xanomeline Low Dose | Xanomeline High Dose |
|---|---|---|---|
| str | str | str | str |
| "Participants in population" | "86" | "84" | "84" |
| "Participants included in ITT p… | "86 (100.0)" | "84 (100.0)" | "84 (100.0)" |
| "Participants included in effic… | "79 (91.9)" | "81 (96.4)" | "74 (88.1)" |
| "Participants included in safet… | "86 (100.0)" | "84 (100.0)" | "84 (100.0)" |
6.10 Step 9: Generate publication-ready output
Finally, we format the population table for regulatory submission using the rtflite package.
doc_overview = rtf.RTFDocument(
df=df_overview,
rtf_title=rtf.RTFTitle(
text=["Analysis Population", "All Participants Randomized"]
),
rtf_column_header=rtf.RTFColumnHeader(
text=["", "Placebo\nn (%)", "Xanomeline Low Dose\nn (%)", "Xanomeline High Dose\nn (%)"],
col_rel_width=[4, 2, 2, 2],
text_justification=["l", "c", "c", "c"],
),
rtf_body=rtf.RTFBody(
col_rel_width=[4, 2, 2, 2],
text_justification=["l", "c", "c", "c"],
),
rtf_source=rtf.RTFSource(text=["Source: ADSL dataset"])
)
doc_overview.write_rtf("rtf/tlf_population.rtf")rtf/tlf_population.rtf
PosixPath('pdf/tlf_population.pdf')