import polars as pl
import rtflite as rtf9 Specific adverse events
Create detailed adverse event tables organized by System Organ Class and Preferred Term to support safety evaluation. Learn to process ADAE data with hierarchical grouping using Polars and generate regulatory-compliant AE listings with rtflite.
9.1 Overview
Specific adverse events tables provide detailed safety information organized by System Organ Class (SOC) and Preferred Term (PT) following the Medical Dictionary for Regulatory Activities (MedDRA) hierarchy. Following ICH E3 guidance, these tables are essential components of clinical study reports that present participant-level adverse event data across treatment groups.
Key features of specific AE tables include:
- Hierarchical structure: SOC categories with nested specific AE terms
- Participant counts: Number of participants experiencing each AE type
- Treatment comparison: Side-by-side counts across treatment groups
- MedDRA compliance: Standardized medical terminology for regulatory submissions
This tutorial demonstrates how to create a regulatory-compliant specific adverse events table using Python’s rtflite package.
9.2 Setup
polars.config.Config
adsl = pl.read_parquet("data/adsl.parquet")
adae = pl.read_parquet("data/adae.parquet")
treatments = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"]9.3 Step 1: Load and explore data
We start by examining the adverse events data structure and understanding the MedDRA hierarchy.
# Display key variables in ADAE dataset
adae_vars = adae.select(["USUBJID", "TRTA", "AEBODSYS", "AEDECOD", "AESEV", "AESER"])
# Key ADAE variables
adae_vars| USUBJID | TRTA | AEBODSYS | AEDECOD | AESEV | AESER |
|---|---|---|---|---|---|
| str | str | str | str | str | str |
| "01-701-1015" | "Placebo" | "GENERAL DISORDERS AND ADMINIST… | "APPLICATION SITE ERYTHEMA" | "MILD" | "N" |
| "01-701-1015" | "Placebo" | "GENERAL DISORDERS AND ADMINIST… | "APPLICATION SITE PRURITUS" | "MILD" | "N" |
| "01-701-1015" | "Placebo" | "GASTROINTESTINAL DISORDERS" | "DIARRHOEA" | "MILD" | "N" |
| … | … | … | … | … | … |
| "01-718-1427" | "Xanomeline High Dose" | "METABOLISM AND NUTRITION DISOR… | "DECREASED APPETITE" | "MODERATE" | "N" |
| "01-718-1427" | "Xanomeline High Dose" | "GASTROINTESTINAL DISORDERS" | "NAUSEA" | "MODERATE" | "N" |
# Examine the MedDRA hierarchy structure
# System Organ Classes (SOCs) in the data
soc_summary = adae.group_by("AEBODSYS").agg(
n_participants=pl.col("USUBJID").n_unique(),
n_events=pl.len()
).sort("n_participants", descending=True)
soc_summary| AEBODSYS | n_participants | n_events |
|---|---|---|
| str | u32 | u32 |
| "GENERAL DISORDERS AND ADMINIST… | 108 | 292 |
| "SKIN AND SUBCUTANEOUS TISSUE D… | 105 | 276 |
| "NERVOUS SYSTEM DISORDERS" | 59 | 101 |
| … | … | … |
| "HEPATOBILIARY DISORDERS" | 1 | 1 |
| "SOCIAL CIRCUMSTANCES" | 1 | 1 |
9.4 Step 2: Prepare analysis population
Following regulatory standards, we focus on the safety analysis population.
# Define safety population
adsl_safety = adsl.filter(pl.col("SAFFL") == "Y").select(["USUBJID", "TRT01A"])
# Safety population size
adsl_safety.height
# Get safety population counts by treatment
pop_counts = adsl_safety.group_by("TRT01A").agg(N=pl.len()).sort("TRT01A")
# Safety population by treatment
pop_counts| TRT01A | N |
|---|---|
| str | u32 |
| "Placebo" | 86 |
| "Xanomeline High Dose" | 84 |
| "Xanomeline Low Dose" | 84 |
# Filter adverse events to safety population
adae_safety = adae.join(adsl_safety, on="USUBJID", how="inner")
# AE records in safety population
adae_safety.height1191
9.5 Step 3: Data preparation and standardization
We standardize the adverse event terms and prepare the hierarchical data structure.
# Standardize AE term formatting for consistency
ae_counts = (
adae_safety
.with_columns([
pl.col("AEDECOD").str.to_titlecase().alias("AEDECOD_STD"),
pl.col("AEBODSYS").str.to_titlecase().alias("AEBODSYS_STD")
])
.group_by(["TRT01A", "AEBODSYS_STD", "AEDECOD_STD"])
.agg(n=pl.col("USUBJID").n_unique())
.sort(["AEBODSYS_STD", "AEDECOD_STD", "TRT01A"])
)
# Sample of prepared AE counts
ae_counts| TRT01A | AEBODSYS_STD | AEDECOD_STD | n |
|---|---|---|---|
| str | str | str | u32 |
| "Placebo" | "Cardiac Disorders" | "Atrial Fibrillation" | 1 |
| "Xanomeline High Dose" | "Cardiac Disorders" | "Atrial Fibrillation" | 3 |
| "Xanomeline Low Dose" | "Cardiac Disorders" | "Atrial Fibrillation" | 1 |
| … | … | … | … |
| "Placebo" | "Vascular Disorders" | "Orthostatic Hypotension" | 1 |
| "Xanomeline High Dose" | "Vascular Disorders" | "Wound Haemorrhage" | 1 |
9.6 Step 4: Build hierarchical table structure
We create a nested table structure with SOC headers and indented specific terms.
# Initialize table with population counts
table_data = [
["Participants in population"] + [
str(pop_counts.filter(pl.col("TRT01A") == t)["N"][0])
for t in treatments
],
[""] * 4 # Blank separator row
]
# Build hierarchical structure: SOC -> Specific AE terms
for soc in ae_counts["AEBODSYS_STD"].unique().sort():
# Add SOC header row (bold formatting will be applied later)
table_data.append([soc] + [""] * 3)
# Get all AE terms within this SOC
soc_data = ae_counts.filter(pl.col("AEBODSYS_STD") == soc)
# Add each specific AE term with counts
for ae_term in soc_data["AEDECOD_STD"].unique().sort():
row = [f" {ae_term}"] # Indent specific terms
# Add counts for each treatment group
for trt in treatments:
count_data = soc_data.filter(
(pl.col("AEDECOD_STD") == ae_term) &
(pl.col("TRT01A") == trt)
)
count = count_data["n"][0] if count_data.height > 0 else 0
row.append(str(count))
table_data.append(row)
# Convert to Polars DataFrame
df_ae_specific = pl.DataFrame(
table_data,
schema=[""] + treatments,
orient="row"
)
# Final table structure
df_ae_specific.shape
df_ae_specific| column_0 | Placebo | Xanomeline Low Dose | Xanomeline High Dose |
|---|---|---|---|
| str | str | str | str |
| "Participants in population" | "86" | "84" | "84" |
| "" | "" | "" | "" |
| "Cardiac Disorders" | "" | "" | "" |
| … | … | … | … |
| " Orthostatic Hypotension" | "1" | "0" | "0" |
| " Wound Haemorrhage" | "0" | "0" | "1" |
9.7 Step 5: Create regulatory-compliant RTF output
We format the table following regulatory submission standards with proper hierarchy and formatting.
# Create comprehensive RTF document
doc_ae_specific = rtf.RTFDocument(
df=df_ae_specific,
rtf_title=rtf.RTFTitle(
text=[
"Adverse Events by System Organ Class and Preferred Term",
"(Safety Analysis Set)"
]
),
rtf_column_header=rtf.RTFColumnHeader(
text=[
"System Organ Class\\line Preferred Term",
f"Placebo\\line (N={pop_counts.filter(pl.col('TRT01A') == 'Placebo')['N'][0]})",
f"Xanomeline Low Dose\\line (N={pop_counts.filter(pl.col('TRT01A') == 'Xanomeline Low Dose')['N'][0]})",
f"Xanomeline High Dose\\line (N={pop_counts.filter(pl.col('TRT01A') == 'Xanomeline High Dose')['N'][0]})"
],
col_rel_width=[4, 1.5, 1.5, 1.5],
text_justification=["l", "c", "c", "c"],
text_format="b", # Bold headers
border_bottom="single"
),
rtf_body=rtf.RTFBody(
col_rel_width=[4, 1.5, 1.5, 1.5],
text_justification=["l", "c", "c", "c"],
# Apply bold formatting to SOC headers (rows without indentation)
text_font_style=lambda df, i, j: "b" if j == 0 and not str(df[i, j]).startswith(" ") and str(df[i, j]) != "" else ""
),
rtf_footnote=rtf.RTFFootnote(
text=[
"MedDRA version 25.0.",
"Each participant is counted once within each preferred term and system organ class.",
"Participants with multiple events in the same preferred term are counted only once for that term."
]
),
rtf_source=rtf.RTFSource(
text=["Source: ADAE Analysis Dataset (Data cutoff: 01JAN2023)"]
)
)
# Generate RTF file
doc_ae_specific.write_rtf("rtf/tlf_ae_specific.rtf")
# RTF file created: rtf/tlf_ae_specific.rtfrtf/tlf_ae_specific.rtf
PosixPath('pdf/tlf_ae_specific.pdf')