import polars as pl
import rtflite as rtf
import pandas as pd
import numpy as np
import statsmodels.formula.api as smf
from scipy import stats as scipy_stats
from importlib.resources import files
= pl.read_parquet(files("rtflite.data").joinpath("adsl.parquet"))
adsl = pl.read_parquet(files("rtflite.data").joinpath("tlf-adlbc.parquet"))
adlbc = ["Placebo", "Xanomeline Low Dose", "Xanomeline High Dose"] treatments
7 ANCOVA Efficacy Analysis
This article demonstrates how to create an ANCOVA efficacy table for glucose levels at Week 24 with LOCF imputation.
7.1 Setup
7.2 Prepare Analysis Data
# Clean data types and filter for efficacy population
= adlbc.with_columns(
adlbc_clean str).str.strip_chars() for c in ["USUBJID", "PARAMCD", "AVISIT", "TRTP"]]
[pl.col(c).cast(
)= adsl.filter(pl.col("EFFFL") == "Y").select(["USUBJID"])
adsl_eff = adlbc_clean.join(adsl_eff, on="USUBJID", how="inner")
adlbc_eff
# Apply LOCF for glucose data up to Week 24
= (
gluc_data filter((pl.col("PARAMCD") == "GLUC") & (pl.col("AVISITN") <= 24))
adlbc_eff."USUBJID", "AVISITN"])
.sort(["USUBJID")
.group_by(
.agg(["TRTP").first(),
pl.col("BASE").first(),
pl.col("AVAL").filter(pl.col("AVISITN") == 0).first().alias("Baseline"),
pl.col("AVAL").last().alias("Week 24")
pl.col(
])filter(pl.col("Baseline").is_not_null() & pl.col("Week 24").is_not_null())
."Week 24") - pl.col("Baseline")).alias("CHG"))
.with_columns((pl.col(
)
# Calculate descriptive statistics
= []
desc_stats for trt in treatments:
= gluc_data.filter(pl.col("TRTP") == trt)
trt_data = adlbc_eff.filter(
baseline_full "PARAMCD") == "GLUC") & (pl.col("AVISIT") == "Baseline") & (pl.col("TRTP") == trt)
(pl.col(
)
desc_stats.append({"Treatment": trt,
"N_Baseline": baseline_full.height,
"Baseline_Mean": baseline_full["AVAL"].mean() if baseline_full.height > 0 else np.nan,
"Baseline_SD": baseline_full["AVAL"].std() if baseline_full.height > 0 else np.nan,
"N_Week24": trt_data.height,
"Week24_Mean": trt_data["Week 24"].mean() if trt_data.height > 0 else np.nan,
"Week24_SD": trt_data["Week 24"].std() if trt_data.height > 0 else np.nan,
"N_Change": trt_data.height,
"Change_Mean": trt_data["CHG"].mean() if trt_data.height > 0 else np.nan,
"Change_SD": trt_data["CHG"].std() if trt_data.height > 0 else np.nan
})
# Perform ANCOVA
= gluc_data.to_pandas()
ancova_df "TRTP"] = pd.Categorical(ancova_df["TRTP"], categories=treatments)
ancova_df[= smf.ols("CHG ~ TRTP + BASE", data=ancova_df).fit()
model
# Calculate LS means and confidence intervals
= ancova_df["BASE"].mean()
base_mean = model.cov_params()
var_cov = []
ls_means
for i, trt in enumerate(treatments):
= np.array([1, int(i==1), int(i==2), base_mean])
x_pred = model.predict(pd.DataFrame({"TRTP": [trt], "BASE": [base_mean]}))[0]
ls_mean = np.sqrt(x_pred @ var_cov @ x_pred.T)
se_pred
ls_means.append({"Treatment": trt,
"LS_Mean": ls_mean,
"CI_Lower": ls_mean - 1.96 * se_pred,
"CI_Upper": ls_mean + 1.96 * se_pred
})
7.3 Create Tables for RTF Output
# Table 1: Descriptive Statistics
= [
tbl1_data
["Treatment"],
s[str(s["N_Baseline"]),
f"{s['Baseline_Mean']:.1f} ({s['Baseline_SD']:.2f})",
str(s["N_Week24"]),
f"{s['Week24_Mean']:.1f} ({s['Week24_SD']:.2f})",
str(s["N_Change"]),
f"{s['Change_Mean']:.1f} ({s['Change_SD']:.2f})",
f"{ls['LS_Mean']:.2f} ({ls['CI_Lower']:.2f}, {ls['CI_Upper']:.2f})"
]for s, ls in zip(desc_stats, ls_means)
]
= pl.DataFrame(tbl1_data, orient="row", schema=[
tbl1 "Treatment", "N_Base", "Mean_SD_Base", "N_Wk24", "Mean_SD_Wk24",
"N_Chg", "Mean_SD_Chg", "LS_Mean_CI"
])
# Table 2: Pairwise Comparisons
= []
tbl2_data for comp_name, trt_name in [("Xanomeline Low Dose - Placebo", "TRTP[T.Xanomeline Low Dose]"),
"Xanomeline High Dose - Placebo", "TRTP[T.Xanomeline High Dose]")] :
(= model.params[trt_name]
coef = model.bse[trt_name]
se = coef / se
t_stat = 2 * (1 - scipy_stats.t.cdf(abs(t_stat), model.df_resid))
p_value
tbl2_data.append([
comp_name,f"{coef:.2f} ({coef - 1.96*se:.2f}, {coef + 1.96*se:.2f})",
f"{p_value:.3f}"
])
= pl.DataFrame(tbl2_data, orient="row", schema=["Comparison", "Diff_CI", "P_Value"]) tbl2
7.4 Create RTF Document
# Create RTF document with two sections
= rtf.RTFDocument(
doc_ancova =[tbl1, tbl2],
df=rtf.RTFTitle(text=[
rtf_title"ANCOVA of Change from Baseline Glucose (mmol/L) at Week 24", "LOCF",
"Efficacy Analysis Population"
]),=[
rtf_column_header=["", "Baseline", "Week 24", "Change from Baseline"],
[rtf.RTFColumnHeader(text=[3, 2, 2, 4], text_justification=["l", "c", "c", "c"]),
col_rel_width=["Treatment", "N", "Mean (SD)", "N", "Mean (SD)", "N",
rtf.RTFColumnHeader(text"Mean (SD)", "LS Mean (95% CI){^a}"],
=[3, 0.7, 1.3, 0.7, 1.3, 0.7, 1.3, 2],
col_rel_width=["l"] + ["c"] * 7, border_bottom="single")],
text_justification=["Pairwise Comparison", "Difference in LS Mean (95% CI){^a}", "p-Value"],
[rtf.RTFColumnHeader(text=[5, 4, 2], text_justification=["l", "c", "c"])]
col_rel_width
],=[
rtf_body=[3, 0.7, 1.3, 0.7, 1.3, 0.7, 1.3, 2],
rtf.RTFBody(col_rel_width=["l"] + ["c"] * 7),
text_justification=[5, 4, 2], text_justification=["l", "c", "c"])
rtf.RTFBody(col_rel_width
],=rtf.RTFFootnote(text=[
rtf_footnote"{^a}Based on an ANCOVA model after adjusting baseline value. LOCF approach is used to impute missing values.",
"ANCOVA = Analysis of Covariance, LOCF = Last Observation Carried Forward",
"CI = Confidence Interval, LS = Least Squares, SD = Standard Deviation"
]),=rtf.RTFSource(text=["Source: ADLBC dataset"])
rtf_source
)
"../rtf/tlf_efficacy_ancova.rtf") doc_ancova.write_rtf(