Appendix B — Code
The code in this appendix is intended to be for proof of work. As such, many of the file paths in the appendix are relative to “missing” directories or have been redacted for privacy reasons, and original, patient-level data files are not included here. Estimates from simulations and some other components that are not dependent on the original data files are included in the data
directory of the online repository.
B.1 Initial Code Files
B.1.1 Study Identification
B.1.1.1 thesis_code/identify_relevant_studies.R
library(dplyr)
library(magrittr)
library(purrr)
library(stringr)
<- "REDACTED"
studies_path
<- list.files(
studies_files
studies_path,recursive = 2
)
grep("sld", studies_files, value = TRUE)
grep("ct", studies_files, value = TRUE)
grep("lesion", studies_files, value = TRUE)
grep("recist", studies_files, value = TRUE)
grep("tr.xpt", studies_files, value = TRUE)
grep("rs.xpt", studies_files, value = TRUE)
grep("adrs.xpt", studies_files, value = TRUE)
grep("^.*/rs.xpt$", studies_files, value = TRUE)
<- "/Study_20250320143641493/GO29436_4_Unconverted"
testing_file_loc
list.files(
paste0(studies_path, testing_file_loc),
full.names = T
%>%
) grep(".xpt", ., value = T) %>%
set_names(., stringr::str_extract(., "(?<=\\/)[^\\/]+.xpt") ) %>%
::map(
purrr~{
::read_xpt(.x) %>%
haven::map_lgl(~any(stringr::str_detect(.x, "(STABLE|PROGRESSIVE|PARTIAL|COMPLETE)"))) %>%
purrrsum(na.rm = T)
}
)
#
# haven::read_xpt(
# paste0(studies_path, testing_file_loc, "/priortx.xpt")
# ) %>%
# # purrr::map_lgl(~any(stringr::str_detect(.x, "(STABLE|PROGRESSIVE|PARTIAL|COMPLETE)")))
# View()
# Explore tr.xpt files ---------------------------------------
<- list.files(
tr_files paste0(studies_path),
full.names = T,
recursive = T
%>%
) grep("tr.xpt", ., value = T) %>%
::set_names()
purrr
<- tr_files %>%
all_tr_files map(
~{
::read_xpt(.x)
haven
}
)
# TREVAL and TREVALID seem to be interesting vars to look at
<- all_tr_files %>%
treval_trevalid_studies map(
~{
<- names(.x)
cols_names tibble(
TREVAL = any(str_detect(cols_names, "TREVAL")),
TREVALID = any(str_detect(cols_names, "TREVALID"))
)
}%>%
) list_rbind(names_to = "id") %>%
mutate(id = str_extract(id, "Study_.*")) %>%
arrange(desc(TREVALID))
<- treval_trevalid_studies %>%
treval_trevalid_studies_data ::filter(TREVALID == T) %>%
dplyrpull(id) %>%
paste0(studies_path, "/", .) %>%
set_names() %>%
map(
~haven::read_xpt(.x)
)
# View(treval_trevalid_studies_data[[1]])
# EXPLORE RS FILES ----------------------------------------------------------
# read in all of the available rs domain files
<- list.files(
rs_file_paths
studies_path,full.names = T,
recursive = T
%>%
) grep("^.*/rs.xpt$", ., value = T) %>%
set_names()
<- rs_file_paths %>%
all_rs_files map(
~haven::read_xpt(.x)
)
# identify studies where it appears there are multiple raters
# i.e. filter for studies where multiple "things" can be found in either
# the RSEVAL or RSEVALID variables
# 5
# View(all_rs_files[[6]])
<- all_rs_files %>%
rseval_and_rsevalid_info map(
~{
<- if ("RSEVAL" %in% names(.x)) select(.x, RSEVAL) %>% pull() %>% unique()
rseval_vals <- if ("RSEVALID" %in% names(.x)) select(.x, RSEVALID) %>% pull() %>% unique()
rsevalid_vals return(list(rseval = rseval_vals, rsevalid = rsevalid_vals))
}
)
<- rseval_and_rsevalid_info %>%
rs_file_paths_with_multiple_raters map_lgl(
~{
length(.x[[1]]) > 1 | length(.x[[2]]) > 1
}%>%
) which() %>%
names()
# read in the studies identified to have multiple SLD image assessors
<- rs_file_paths_with_multiple_raters %>%
rs_files_with_multiple_raters map(
~{
::read_xpt(.x)
haven
}
)
# View(rs_files_with_multiple_raters[[2]])
# count the number of unique participants
%>%
rs_files_with_multiple_raters map_dbl(~{pull(.x, matches("USUBJID|UNI_ID")) %>% unique() %>% length()})
# count the number of unique scan or visit dates per participant
%>%
rs_files_with_multiple_raters map_dbl(
~{
<- sym(names(select(.x,matches("USUBJID|UNI_ID"))))
group_var_sym %>%
.x group_by(!!group_var_sym, RSDY) %>%
summarize(count = n(), .groups = 'drop') %>%
nrow()
}
)
B.1.1.2 thesis_code/identify_studies_with_rs_data.R
library(purrr)
library(magrittr)
library(dplyr)
<-"REDACTED"
studies_path
<- list.files(
rs_file_paths
studies_path,full.names = T,
recursive = T
%>%
) grep("^.*/rs.xpt$", ., value = T)
%<>%
rs_file_paths ::set_names()
purrr
<- rs_file_paths %>%
all_rs_files map(
~haven::read_xpt(.x)
)
# Manually check/confirm presence of useful RSEVAL and RSEVALID in the files
# if (interactive()) {
# View(all_rs_files[[1]]) # confirmed no
# View(all_rs_files[[2]]) # only investigator listed for RSEVAL
# View(all_rs_files[[3]]) # only investigator listed for RSEVAL
# View(all_rs_files[[4]]) # only investigator listed for RSEVAL
# View(all_rs_files[[5]]) # accepted.
# View(all_rs_files[[6]]) # only investigator listed for RSEVAL
# View(all_rs_files[[7]]) # only investigator listed for RSEVAL
# View(all_rs_files[[8]]) # only investigator listed for RSEVAL
# View(all_rs_files[[9]]) # only investigator listed for RSEVAL
# View(all_rs_files[[10]]) # only investigator listed for RSEVAL
# View(all_rs_files[[11]]) # has investigator, oncologist 1, oncologist 2, and an adjudicator
# View(all_rs_files[[12]]) # has RADIOLOGIST 1 and 2, but only 60 records and no 3rd rater
# # `/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125523650/M16-289_17_Unconverted/rs.xpt`
# View(all_rs_files[[13]]) # accepted. has investigator and radiologist 1/2
# View(all_rs_files[[14]]) # only investigator listed for RSEVAL
# View(all_rs_files[[15]]) # accepted. has investigator and radiologist 1/2
# View(all_rs_files[[16]]) # only investigator listed for RSEVAL
# View(all_rs_files[[17]])
# }
# Programmatically complete the same process for identifying useful data
# identify studies where it appears there are multiple raters
# i.e. filter for studies where multiple "things" can be found in either
# the RSEVAL or RSEVALID variables
<- all_rs_files %>%
rseval_and_rsevalid_info map(
~{
<- if ("RSEVAL" %in% names(.x)) select(.x, RSEVAL) %>% pull() %>% unique() %>% setdiff("")
rseval_vals <- if ("RSEVALID" %in% names(.x)) select(.x, RSEVALID) %>% pull() %>% unique() %>% setdiff("")
rsevalid_vals return(list(rseval = rseval_vals, rsevalid = rsevalid_vals))
}
)
<- rseval_and_rsevalid_info %>%
rs_file_paths_with_multiple_raters map_lgl(
~{
length(.x[[1]]) > 1 | length(.x[[2]]) > 1
}%>%
) which() %>%
names() %>%
# specifically remove study 12 as it has a limited number of records
setdiff("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125523650/M16-289_17_Unconverted/rs.xpt") %>%
# remove another study as it does not actually use RECIST guidelines
setdiff("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125458298/EFC14335_3_Unconverted/rs.xpt") %>%
::set_names()
purrr
# read in the studies identified to have multiple SLD image assessors
<- rs_file_paths_with_multiple_raters %>%
rs_files_with_multiple_raters map(
~{
::read_xpt(.x)
haven
}
)
1]] %<>%
rs_files_with_multiple_raters[[mutate(RSEVALID = if_else(RSEVALID == "ONCOLOGIST", "SITE INVESTIGATOR", RSEVALID)) %>%
mutate(
RSEVALID = if_else(RSEVALID == "", RSEVAL, RSEVALID)
)
2]] %<>%
rs_files_with_multiple_raters[[mutate(RSEVALID = case_when(
== "RADIOLOGIST 1" ~ "READER 1",
RSEVALID == "RADIOLOGIST 2" ~ "READER 2",
RSEVALID TRUE ~ RSEVALID
)%>%
) mutate(
RSEVALID = if_else(RSEVALID == "", RSEVAL, RSEVALID)
%>%
) mutate(RSEVALID = if_else(RSEVALID == "INVESTIGATOR", "SITE INVESTIGATOR", RSEVALID))
3]] %<>%
rs_files_with_multiple_raters[[mutate(
RSEVALID = if_else(RSEVALID == "", RSEVAL, RSEVALID)
%>%
) mutate(RSEVALID = if_else(RSEVALID == "INVESTIGATOR", "SITE INVESTIGATOR", RSEVALID))
saveRDS(rs_files_with_multiple_raters, "rs_files_with_multiple_raters.rds")
B.1.1.3 thesis_code/identify_tr_sumdiam_data.R
#### CHECK TR FILES WHERE RS DATA ALREADY IDENTIFIED
library(stringr)
<- names(rs_file_paths_with_multiple_raters) %>%
tr_where_rs_files str_remove_all("/rs.xpt") %>%
paste0("/tr.xpt") %>%
map(
~haven::read_xpt(.x)
)
saveRDS(tr_where_rs_files, "tr_where_rs_files.rds")
tr_where_rs_files
# easy enough
1]] %>%
tr_where_rs_files[[# dplyr::filter(TRTESTCD == "SUMDIAM") %>%
arrange(USUBJID, TRDY, TREVAL, TREVALID) %>%
::filter(TRSTRESU == "mm") %>%
dplyrView()
# what is going on here??
2]] %>%
tr_where_rs_files[[# dplyr::filter(TRTESTCD =="SUMLDIAM") %>%
# dplyr::filter(TREVAL == "INVESTIGATOR") %>%
::filter(TRSTRESU == "mm") %>%
dplyrarrange(UNI_ID, TRDY, TREVAL, TREVALID) %>%
::filter(str_detect(UNI_ID, "08d7eb2980d3fcf841b2aaf41bc848faae5ea0c09429bd8408a35e5e0ecd8d2d")) %>%
dplyr::filter(str_detect(TRLNKID, "^RECIST 1.1.*|^$|^INV.*") | is.na(TRLNKID)) %>%
dplyr# count(TRTESTCD) %>%
View()
# easy enough
3]] %>%
tr_where_rs_files[[::filter(TRCAT == "RECIST 1.1") %>%
dplyr::filter(TRTESTCD == "SUMDIAM") %>%
dplyrarrange(USUBJID, TRDY, TREVAL, TREVALID) %>%
View()
### CHECK SUPPLEMENTAL FILES FOR TUMOR INFORMATION
# ADRS DOMAIN IF IT EXISTS
<- names(rs_file_paths_with_multiple_raters) %>%
adrs_data str_remove_all("/rs.xpt") %>%
paste0("/adrs.xpt") %>%
map(
~try(haven::read_xpt(.x))
)
2]] %>%
adrs_data[[# dplyr::filter(PARAM %in% c("Sum of Longest Diameters by Visit", "Sum of Longest Diameters by Visit by IRF")) %>%
arrange(UNI_ID, AVISITN) %>%
View()
# TU DOMAIN IF IT EXISTS
# note that this domain does not have measurements of tumors
# but it does seem to clearly identify new, target, and non-target lesions
# tu_data <- names(rs_file_paths_with_multiple_raters) %>%
# str_remove_all("/rs.xpt") %>%
# paste0("/tu.xpt") %>%
# map(
# ~try(haven::read_xpt(.x))
# )
#
#
# tu_data[[3]] %>% View()
# SUPPTR had no extra info of value
# supptr_data <- names(rs_file_paths_with_multiple_raters) %>%
# str_remove_all("/rs.xpt") %>%
# paste0("/supptr.xpt") %>%
# map(
# ~try(haven::read_xpt(.x))
# )
<- names(rs_file_paths_with_multiple_raters) %>%
all_xpts_one str_remove_all("/rs.xpt") %>%
1] %>%
.[list.files(full.names = T) %>%
grep(".xpt", ., value = T) %>%
set_names() %>%
map(
~haven::read_xpt(.x)
)
::keep(all_xpts_one,
purrr~{
%>%
.x ::map_lgl(~any(stringr::str_detect(.x, "mm"))) %>%
purrrsum(na.rm = T) %>%
> 0}
{.
} )
B.1.2 Data Preparation
B.1.2.1 thesis_code/clean_rs_domain_data.R
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
<- list()
rs_data_cleaned_wide
$NCT02395172$first <- rs_files_with_multiple_raters[["REDACTED/EMR 100070-004_10_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(USUBJID, RSDY) %>%
::filter(
dplyr== "OVRLRESP",
RSTESTCD != "" & !is.na(RSSTRESC),
RSSTRESC !RSEVALID %in% c(""),
!is.na(RSDY) & RSDY > 0
%>%
) select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
group_by(USUBJID) %>% dplyr::filter(RSDY == min(RSDY)) %>%
::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
$NCT02395172$last <- rs_files_with_multiple_raters[["REDACTED/EMR 100070-004_10_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(USUBJID, RSDY) %>%
::filter(
dplyr== "OVRLRESP",
RSTESTCD != "" & !is.na(RSSTRESC),
RSSTRESC !RSEVALID %in% c(""),
!is.na(RSDY) & RSDY > 0
%>%
) select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
group_by(USUBJID) %>% dplyr::filter(RSDY == max(RSDY)) %>%
::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
$NCT02395172$overall <- rs_files_with_multiple_raters[["REDACTED/EMR 100070-004_10_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(USUBJID, RSDY) %>%
::filter(
dplyr== "OVRLRESP",
RSTESTCD != "" & !is.na(RSSTRESC),
RSSTRESC !RSEVALID %in% c(""),
!is.na(RSDY) & RSDY > 0
%>%
) select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
# Study_20250320125552850
$NCT03434379$first <- rs_files_with_multiple_raters[["REDACTED/YO40245_4_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(UNI_TRNC, RSDY, RSTEST) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID %>%
) select(UNI_TRNC, RSDY, RSSTRESC, RSEVALID) %>%
group_by(UNI_TRNC) %>% dplyr::filter(RSDY == min(RSDY)) %>%
::unite("Unique_Observation", c("UNI_TRNC", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
$NCT03434379$last <- rs_files_with_multiple_raters[["REDACTED/YO40245_4_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(UNI_TRNC, RSDY, RSTEST) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID %>%
) arrange(UNI_TRNC, RSDY) %>%
select(UNI_TRNC, RSDY, RSSTRESC, RSEVALID) %>%
group_by(UNI_TRNC) %>% dplyr::filter(RSDY == max(RSDY)) %>%
::unite("Unique_Observation", c("UNI_TRNC", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
$NCT03434379$overall <- rs_files_with_multiple_raters[["REDACTED/YO40245_4_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(UNI_TRNC, RSDY, RSTEST) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID %>%
) arrange(UNI_TRNC, RSDY) %>%
select(UNI_TRNC, RSDY, RSSTRESC, RSEVALID) %>%
::unite("Unique_Observation", c("UNI_TRNC", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
# this study has a define.xml file I could read-in to parse for more info
$NCT03631706$first <- rs_files_with_multiple_raters[["REDACTED/MS200647_0037_10_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(USUBJID, RSDY) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "OVRLRESP",
RSTESTCD == "RECIST 1.1"
RSCAT %>%
) # one instance of a duplicate--remove with distinct
distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
group_by(USUBJID) %>% dplyr::filter(RSDY == min(RSDY)) %>%
::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
$NCT03631706$last <- rs_files_with_multiple_raters[["REDACTED/MS200647_0037_10_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(USUBJID, RSDY) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "OVRLRESP",
RSTESTCD == "RECIST 1.1"
RSCAT %>%
) # one instance of a duplicate--remove with distinct
distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
group_by(USUBJID) %>% dplyr::filter(RSDY == max(RSDY)) %>%
::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
$NCT03631706$overall <- rs_files_with_multiple_raters[["REDACTED/MS200647_0037_10_Unconverted/rs.xpt"]] %>%
rs_data_cleaned_widearrange(USUBJID, RSDY) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "OVRLRESP",
RSTESTCD == "RECIST 1.1"
RSCAT %>%
) # one instance of a duplicate--remove with distinct
distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)
tidyr
B.1.2.2 thesis_code/clean_tr_domain_data.R
<- function(x) {
get_mode # If the vector has only one unique value, return it directly
if (length(unique(na.omit(x))) == 1) {
return(unique(na.omit(x)))
}
# Otherwise, calculate the mode
<- unique(x)
uniq_vals which.max(tabulate(match(x, uniq_vals)))]
uniq_vals[
}
# tr_where_rs_files[[1]] %>%
# dplyr::filter(TRTESTCD == "SUMDIAM") %>%
# mutate(
# TRLNKGRP_sub = str_remove(TRLNKGRP, "_E[:digit:]") ,
# .after = TRLNKGRP
# ) %>%
# group_by(USUBJID, TRLNKGRP_sub) %>%
# mutate(
# TRDY_sub = get_mode(TRDY)
# ) %>%
# ungroup() %>%
# dplyr::filter(!is.na(TRDY_sub)) %>%
# mutate(
# TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
# ) %>%
# group_by(USUBJID, TREVALID) %>%
# dplyr::filter(!is.na(TRSTRESN)) %>%
# mutate(
# baseline_value = first(TRSTRESN),
# mm_change_from_baseline = TRSTRESN - first(TRSTRESN),
# mm_change_from_previous = TRSTRESN - lag(TRSTRESN),
# nadir_absolute_value = min(TRSTRESN),
# nadir_cummin_value = cummin(TRSTRESN)
# ) %>%
# select(USUBJID, TRLNKGRP, TRLNKGRP_sub, TRORRES, TRSTRESN, TREVAL, TREVALID, TRDY_sub, baseline_value, matches("mm_change"), matches("nadir")) %>%
# mutate(
# percent_response = if_else(
# !is.na(mm_change_from_previous),
# (TRSTRESN - lag(nadir_cummin_value))/lag(nadir_cummin_value),
# NA_real_
# )
# ) %>%
# mutate(
# baseline_response = ((TRSTRESN-baseline_value)/baseline_value),
# progressive_disease = (percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5),
# partial_response = (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease,
# complete_response = TRSTRESN == 0
# ) %>%
# mutate(
# recist_response = case_when(
# TRSTRESN == 0 ~ "CR",
# ((percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5)) ~ "PD",
# (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease ~ "PR",
# is.na(mm_change_from_previous) ~ NA_character_,
# TRUE ~ "SD"
# )
# ) %>%
# group_by(USUBJID) %>%
# dplyr::filter(any(TRSTRESN == 0)) %>%
# arrange(USUBJID, TREVALID) %>%
# View()
<- tr_where_rs_files[[1]] %>%
tr_study_data_one_target_tumors ::filter(TRTESTCD == "SUMDIAM") %>%
dplyrmutate(
TRLNKGRP_sub = str_remove(TRLNKGRP, "_E[:digit:]") ,
.after = TRLNKGRP
%>%
) group_by(USUBJID, TRLNKGRP_sub) %>%
mutate(
TRDY_sub = get_mode(TRDY)
%>%
) ungroup() %>%
::filter(!is.na(TRDY_sub)) %>%
dplyrmutate(
TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
%>%
) group_by(USUBJID, TREVALID) %>%
::filter(!is.na(TRSTRESN)) %>%
dplyrmutate(
baseline_value = first(TRSTRESN),
mm_change_from_baseline = TRSTRESN - first(TRSTRESN),
mm_change_from_previous = TRSTRESN - lag(TRSTRESN),
nadir_absolute_value = min(TRSTRESN),
nadir_cummin_value = cummin(TRSTRESN)
%>%
) select(USUBJID, TRLNKID, TRLNKGRP, TRLNKGRP_sub, TRTESTCD, TRTEST, TRORRES, TRSTRESN, TREVAL, TREVALID, TRDY_sub, baseline_value, matches("mm_change"), matches("nadir")) %>%
mutate(
percent_response = if_else(
!is.na(mm_change_from_previous),
- lag(nadir_cummin_value))/lag(nadir_cummin_value),
(TRSTRESN NA_real_
)
)
<- tr_where_rs_files[[1]] %>%
tr_study_data_one_nontarget_new_tumors ::mutate(TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)) %>%
dplyr::filter(
dplyr== "TUMSTATE",
TRTESTCD # str_detect(TRLNKID, "^NT.*|^NEW.*"),
!is.na(TRDY), TRDY > 0,
# TREVALID == "INVESTIGATOR"
%>%
) group_by(USUBJID, TRDY, TREVALID) %>%
summarize(
new_lesion =
case_when(
any(TREVALID == "INVESTIGATOR" & str_detect(TRLNKID, "^NEW.*")) ~ "Yes",
any(str_detect(TREVALID, "READER") & str_detect(TRLNKID, "^NEW.*") & str_detect(TRSTRESC, "^UNEQUIVOCAL PROGRESSION$|^UNEQUIVOCAL, PREVIOUSLY EQUIVOCAL$|^UNEQUIVOCAL, STILL PRESENT$|^NEW UNEQUIVOCAL$")) ~ "Yes",
TRUE ~ "No"
),non_target_lesion_status =
case_when(
any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^DISAPPEARED$|^ABSENT$")) ~ "CR",
any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^UNEQUIVOCAL PROGRESSION$")) ~ "PD",
any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^STABLE$|^PROGRESSED$|^PRESENT$|^NON-PATHOLOGICAL NODE.*$|^DECREASED$")) ~ "Non-CR/Non-PD",
any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^NOT EVALUABLE$")) ~ "NOT EVALUABLE",
any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^NOT ASSESSABLE$")) ~ "NOT ASSESSABLE",
any(str_detect(TRLNKID, "^NT.*")) ~ "NO STATUS",
TRUE ~ NA_character_
)%>%
) ungroup()
%>%
tr_study_data_one_target_tumors mutate(
baseline_response = ((TRSTRESN-baseline_value)/baseline_value),
progressive_disease = (percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5),
partial_response = (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease,
complete_response = TRSTRESN == 0
%>%
) mutate(
recist_response = case_when(
== 0 ~ "CR",
TRSTRESN > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5)) ~ "PD",
((percent_response -baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease ~ "PR",
(((TRSTRESNis.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "SD"
)%>%
) left_join(
tr_study_data_one_nontarget_new_tumors,by = c("USUBJID" = "USUBJID", "TRDY_sub" = "TRDY", "TREVALID" = "TREVALID")
%>%
) mutate(
new_lesion = if_else(is.na(new_lesion), "No", new_lesion)
%>%
) mutate(
overall_response = case_when(
== "PD" ~ "PD",
recist_response == "Yes" ~ "PD",
new_lesion == "No" & recist_response == "SD" & (non_target_lesion_status %in% c("CR", "Non-CR/Non-PD") | is.na(non_target_lesion_status)) ~ "SD",
new_lesion == "No" & recist_response == "PR" & (non_target_lesion_status %in% c("CR", "Non-CR/Non-PD") | is.na(non_target_lesion_status)) ~ "PR",
new_lesion == "No" & recist_response == "CR" & (non_target_lesion_status %in% c("Non-CR/Non-PD") | is.na(non_target_lesion_status)) ~ "PR",
new_lesion == "No" & recist_response == "CR" & (non_target_lesion_status %in% c("CR") | is.na(non_target_lesion_status)) ~ "CR",
new_lesion
)
)
B.2 IRR Analyses
B.2.0.1 thesis_code/irr_analyses/calculate_irr.R
source("clean_rs_domain_data.R")
library(purrr)
# Fleiss Kappa for studies where it's possible
%>%
rs_data_cleaned_wide map(
~{
map(.x, ~{.x %>%
select(-1) %>%
::kappam.fleiss(., detail = T)}
irr
)
}%>%
) map(~map(.x, ~.x$value))
library(boot)
library(irr)
<- function(data, indices) {
fleiss_kappa_func # Resample the data
<- data[indices, ]
sample_data # Calculate Fleiss' Kappa for the resampled data
<- kappam.fleiss(sample_data)$value
kappa return(kappa)
}
# Bootstrapping
<- map(rs_data_cleaned_wide, ~{map(.x,
boostrapped_fleiss_kappas ~{
set.seed(123) # For reproducibility
<- boot(data = select(.x,-1), statistic = fleiss_kappa_func, R = 100)
results
results
}
)
}
)
# str(boostrapped_fleiss_kappas[[1]]$first)
#
# boostrapped_fleiss_kappas[[1]]$first$data %>%
# boostrapped_fleiss_kappas[[1]]$first$statistic()
#
#
# sd(boostrapped_fleiss_kappas[[1]]$first$t)
<- map(boostrapped_fleiss_kappas, ~{
fleiss_kappa_estimates_ses map(.x, ~{
<- .x$statistic(.x$data)
Estimate <- sd(.x$t)
SE return(tibble(Estimate = Estimate, SE = SE))
})
})
B.2.0.2 thesis_code/irr_analyses/calculate_pairwise_rater_irr.R
# cohens_kappa_example <- rs_data_cleaned_wide$NCT02395172$overall %>%
# select(2,3) %>%
# irr::kappa2()
library(boot)
<- function(data, indices) {
cohens_kappa_func # Resample the data
<- data[indices, ]
sample_data # Calculate Fleiss' Kappa for the resampled data
<- irr::kappa2(sample_data)$value
kappa return(kappa)
}
# Create pairwise data sets for the raters
<- list()
cohens_kappa_data
$reader1_reader2 <- rs_data_cleaned_wide %>%
cohens_kappa_datamap(~map(.x, ~select(.x, -1) %>% select(matches("READER|RADIOLOGIST"))))
$investigator_reader2 <- rs_data_cleaned_wide %>%
cohens_kappa_datamap(~map(.x, ~select(.x, -1)
%>% select(matches("(INVESTIGATOR|ONCOLOGIST)"), matches("(READER 1|RADIOLOGIST 1)"))
)
)
$investigator_reader1 <- rs_data_cleaned_wide %>%
cohens_kappa_datamap(~map(.x, ~select(.x, -1)
%>% select(matches("(INVESTIGATOR|ONCOLOGIST)"), matches("(READER 2|RADIOLOGIST 2)"))
)
)
# Calculate Cohen's kappa for all pairwise ratings
<- list()
cohens_kappa_results
$reader1_reader2 <- map(
cohens_kappa_results$reader1_reader2,
cohens_kappa_data~{map(.x,
~{
set.seed(123) # For reproducibility
<- boot(data = .x, statistic = cohens_kappa_func, R = 10)
results
results
}
)
}
)
$investigator_reader2 <- map(
cohens_kappa_results$investigator_reader2,
cohens_kappa_data~{map(.x,
~{
set.seed(123) # For reproducibility
<- boot(data = .x, statistic = cohens_kappa_func, R = 10)
results
results
}
)
}
)
$investigator_reader1 <- map(
cohens_kappa_results$investigator_reader1,
cohens_kappa_data~{map(.x,
~{
set.seed(123) # For reproducibility
<- boot(data = .x, statistic = cohens_kappa_func, R = 10)
results
results
}
)
}
)
# Create cross-tabulations for all data frames
# Calculate Cohen's kappa for all pairwise ratings
<- list()
pairwise_tabulations
$reader1_reader2 <- map(
pairwise_tabulations$reader1_reader2,
cohens_kappa_data~{map(.x,
~{xtabs(data = .x)}
)
}
)
$investigator_reader2 <- map(
pairwise_tabulations$investigator_reader2,
cohens_kappa_data~{map(.x,
~{xtabs(data = .x)}
)
}
)
$investigator_reader1 <- map(
pairwise_tabulations$investigator_reader1,
cohens_kappa_data~{map(.x,
~{xtabs(data = .x)}
)
}
)
$investigator_reader1$NCT02395172$overall %>%
cohens_kappa_data::kappa2(.)
irr
<- map(cohens_kappa_data, ~{
pairwise_cohens map(.x, ~{
# browser()
<- .x[["overall"]] %>%
temp mutate(across(everything(), ~factor(.x, levels = c('NON-CR/NON-PD', 'CR', 'PD', 'PR', 'NE', 'SD')))) %>%
table() %>%
::cohen.kappa()
psych<- paste0(round(temp$kappa, 3), " [" , round(temp$confid[1],3), ", ", round(temp$confid[5],3), "], n=", temp$n.obs)
out print(out)
out
})
})
# temp_result <- cohens_kappa_data$investigator_reader2$NCT03434379$overall %>%
# irr::kappa2()
#
#
# temp_result <- cohens_kappa_data$investigator_reader2$NCT03434379$overall %>%
# mutate(across(everything(), ~factor(.x, levels = c('NON-CR/NON-PD', 'CR', 'PD', 'PR', 'NE', 'SD')))) %>%
# table() %>%
# psych::cohen.kappa()
# pairwise_cohens %>%
# bind_rows(.id = "Pairs") %>%
# mutate(Average = (NCT02395172 + NCT03434379 + NCT03631706)/3,
# Pairs = case_when(
# Pairs == "reader1_reader2" ~ "Central Reviewer 1 vs. 2",
# Pairs == "investigator_reader1" ~ "Site Investigator vs. Central Reviewer 1",
# Pairs == "investigator_reader2" ~ "Site Investigator vs. Central Reviewer 2"
# )
# ) %>%
# gt::gt() %>%
# gt::tab_header(title = glue::glue("Pairwise Cohen's Kappa values for Clinical Trials")) %>%
# gt::tab_footnote(
# footnote = "Surprisingly, the average agreement for the central reviewers is lower than the agreement between site investigators and central reviwers. However, there is quite a bit of variability within studies, as the central reviewers of NCT02395172 exhibted substantially lower overall agreement than the site investigators vs. central review."
# )
saveRDS(pairwise_cohens, "transfer/pairwise_cohens_summarized.rds")
B.2.0.3 thesis_code/irr_analyses/cochrane_diagram.R
<- list()
lit_search $file_path <- "irr_analyses/Metaanalysis Literature Search.xlsx"
lit_search
$sheet_names <- readxl::excel_sheets(
lit_search$file_path
lit_search
)
$data <- map(
lit_searchset_names(lit_search$sheet_names),
~readxl::read_excel(lit_search$file_path, sheet = .x)
)
$exclusion_strings$compare_to_or_using_other_frameworks_string <- c(
lit_search"C omparing RECIST to another framework",
"Comparing CT and PET scan concordance",
"Comparing RECIST 1.1 with RECIST 1.0",
"Comparing RECIST to Choi and Tumor Attenuation criteria",
"Comparing RECIST to EURAMOS",
"Comparing RECIST to another framewoek",
"Comparing RECIST to other frameworks",
"Comparing RECIST to volumetric approach",
"Comparing RECIST with EORTC and PERCIST",
"Comparing RECIST with PNS",
"Comparing RECIST with histological findings",
"Comparing RECIST with iRECIST",
"Comparing RECIST with iRECIST and mRECIST",
"Comparing RECIST with other metrics",
"Comparing RECIST with other modalities",
"Comparing RECIST with serum 5-hiaa levels",
"Comparing RECIST with volumetric measure",
"Comparing imaging modalities",
"Comparing other metrics",
"Comparing radiography combined with other modalities",
"Evaluating RECIST vs. Choi criteria",
"evaluating reliability of computer-aided approach"
)
$exclusion_strings$relevant_but_no_irr <- c(
lit_search"Did not compare the raters on needed outcome",
"Comparing agreement over time?",
"Did not do an IRR",
"No interobserver evaluation done",
"Not comparing raters",
"Would be possible based on study structure, but data/information to calculate not provided"
)
$exclusion_strings$measured_sld_variability_but_not_recist_outcome <- c(
lit_search"Evaluating measurement differences, but not outcome differences",
"Evaluating measuring technique, not ordinal outcome of RECIST",
"Has an ICC analysis of tumors, but does not look at RECIST ordinal outcomes",
"Looked at measurements but not outcomes even though interrater analyses were made",
"No assessment of ordinal outcome",
"Not looking at the ordinal outcomes of RECIST",
"Only evaluation of raters was with ICC"
)
$exclusion_strings$non_human_study <- c(
lit_search"Evaluation in canine",
"Canine study",
"Is about dogs?"
)
$exclusion_strings$not_evaluating_recist <- c(
lit_search"Doesn't seem to be evaluating RECIST",
"Evaluates mRECIST, not RECIST 1.1",
"Evaluating PERCIST",
"Evaluating mRECIST",
"Evaluating a model they built with train/test data based on mRECIST criteria",
"Measuring volume",
"not looking at RECIST",
"Not assessing RECIST outcomes and was measuring ICC only",
"Not evaluating RECIST",
"Not evaluating RECIST 1.1",
"Not evaluating reviewers on RECIST",
"Not measuring ordinal outcomes and using non-standard imaging technique for tumors",
"Not using RECIST, but rather several derivatives",
"Used mRECIST and no Cohen/Fleiss kappa",
"Uses mRECIST"
)
$exclusion_strings$irrelevant_topic <- c(
lit_search"Irrelevant topic",
"No relevant analysis",
"Non relevant analyses",
"Not evaluating anything relevant to me",
"Not relevant",
"Not relevant at all",
"Not relevant to RECIST",
"Not eligible"
)
$exclusion_strings$paywalled_article <- c(
lit_search"No access; paywall",
"Paywalled article",
"Can't access article to evaluate further, and probably can't convert Light's kappa to Fleiss'"
)
$exclusion_strings$wrong_irr_stat_or_cis_not_given <- c(
lit_search"Probably can't use this statistic",
"Cis not available, but there is a table of the 3 observers' response classifications so I think I could reverse engineer this to at least create an estimated bootstrap CI. Need to create permuted data sets until there are several options that converge to k=0.7558, and then bootstrap these for CIs, and then average the CIs",
"Not enough info provided (SE, data) to use this",
"Cites another study with relevant data,by Liu…Wang"
)
$data[1:3] %>%
lit_searchbind_rows(.id = "Worksheet") %>%
arrange(Relevant, Notes) %>%
mutate(
prisma_status = case_when(
== "Duplicate" ~ "Duplicate",
Relevant %in% lit_search$exclusion_strings$compare_to_or_using_other_frameworks_string ~ "Comparing RECIST to other frameworks or metrics",
Notes %in% lit_search$exclusion_strings$relevant_but_no_irr ~ "No Interrater Reliability Analysis",
Notes %in% lit_search$exclusion_strings$measured_sld_variability_but_not_recist_outcome ~ "Interrater Reliability Analysis of non-RECIST scale",
Notes %in% lit_search$exclusion_strings$non_human_study ~ "Non-human Study",
Notes %in% lit_search$exclusion_strings$not_evaluating_recist ~ "Not evaluing RECIST",
Notes %in% lit_search$exclusion_strings$irrelevant_topic ~ "Irrelevant topic",
Notes %in% lit_search$exclusion_strings$paywalled_article ~ "Paywalled article",
Notes %in% lit_search$exclusion_strings$wrong_irr_stat_or_cis_not_given ~ "Insufficient statistics provided",
Notes == "Yes" & !is.na(Statistic) & stringr::str_detect(Statistic, "Fleiss") ~ "Fleiss' Kappa",
Relevant == "Yes" & !is.na(Statistic) & stringr::str_detect(Statistic, "Cohen") ~ "Cohen's Kappa"
Relevant
)%>%
) mutate(Worksheet = if_else(Worksheet %in% c("PubMed1", "PubMed2"), "PubMed", Worksheet)) %>%
count(Worksheet, prisma_status) %>%
View()
# setdiff(
# pull(lit_search$data[[4]], PMID),
# pull(lit_search$data[5:6] %>% bind_rows(), PMID)
# )
library(DiagrammeR)
grViz("
digraph prisma {
graph [layout = dot, rankdir = TB]
node [shape = box, style = filled, fillcolor = lightgrey, fontname = Helvetica]
A [label = 'Records identified through database searching\\n(n = 131)']
B [label = 'Records after duplicates removed\\n(n = 115)']
C [label = 'Records screened\\n(n = 115)']
D [label = 'Records excluded\\n(n = 22)']
DD [label = 'Irrelevant Topic\\n(n = 15)']
DDD [label = 'Paywalled article\\n(n = 4)']
DDDD [label = 'Non-humann study\\n(n = 3)']
E [label = 'Full-text articles assessed for eligibility\\n(n = 93)']
F [label = 'Full-text articles excluded\\n(n = 82)']
G [label = 'Studies included in\nquantitative synthesis (meta-analysis)\\n(n = 11)']
H [label = 'Cohens Kappa\\n(n = 5)']
I [label = 'Fleiss Kappa\\n(n = 6)']
FF [label = 'Comparing RECIST to other\nframeworks or metrics\\n(n = 36)']
FFF [label = 'Not evaluing\nRECIST\\n(n = 25)']
FFFF [label = 'No Interrater\nReliability Analysis\\n(n = 7)']
FFFFF [label = 'Insufficient\nstatistics\\n(n = 4)']
A -> B -> C
C -> D
D -> DD
D -> DDD
D -> DDDD
C -> E
E -> F
E -> G
G -> H
G -> I
F -> FF
F -> FFF
F -> FFFF
F -> FFFFF
}
")
B.2.0.4 thesis_code/irr_analyses/irr_exploration.R
# Categorical data: Cohen's kappa for 2 raters, and Fleiss' kappa for >2 raters
# Continuous data: intra-class correlation (ICC) as this calculation includes a variance component
library(irr)
# Example data: 10 items assessed by 3 raters
# Let's assume the 4 possible outcomes are coded as 1, 2, 3, and 4
<- matrix(c(0, 0, 3, 0, # All raters chose category 3 for the first item
ratings 0, 2, 1, 0, # Two raters chose category 2, one chose category 3 for the second item
0, 0, 0, 3, # All raters chose category 4 for the third item
1, 1, 1, 0, # Each category was chosen once for the fourth item
2, 1, 0, 0, # ... and so on for the rest of the items
0, 0, 2, 1,
0, 3, 0, 0,
3, 0, 0, 0,
1, 0, 0, 2,
0, 1, 0, 2),
nrow = 10, byrow = TRUE,
dimnames = list(NULL, c("Category1", "Category2", "Category3", "Category4")))
# Calculate Fleiss' Kappa for inter-rater reliability
<- kappam.fleiss(as.table(ratings))
kappa_results
kappa_results
library(boot)
<- function(data, indices) {
fleiss_kappa_func # Resample the data
<- data[indices, ]
sample_data # Calculate Fleiss' Kappa for the resampled data
<- kappam.fleiss(sample_data)$value
kappa return(kappa)
}
# Bootstrapping
set.seed(123) # For reproducibility
<- boot(data = ratings, statistic = fleiss_kappa_func, R = 10000))
(results
<- sd(results$t)
se_kappa
B.2.0.5 thesis_code/irr_analyses/irr_kappa_meta_analysis.R
library(magrittr)
library(dplyr)
library(metafor)
library(bayesmeta)
# NEED TO IMPORT THE DATA FILE WITH THE KAPPA VALUES
source("./irr_analyses/calculate_irr.R")
<- list()
meta_analysis_data
$fleiss_literature <- tibble(
meta_analysis_dataStat = "Fleiss",
Study = c('PMID29633000','PMID30398433','PMID33012693','PMID33401109','PMID25488429','PMID39658684'),
Author = c("Tovoli et al.", "Kuhl et al.", "Ghosn et al.", "Zimmermann et al.", "Oubel et al.", "El Homsi et al."),
Estimate = c(0.741, 0.72, 0.77, 0.857, 0.72, 0.68),
SE = c(0.054, 0.01785, 0.0969, 0.0768, 0.088, 0.0357)
)
$cohen_literature <- tibble(
meta_analysis_dataStat = "Cohen",
Study = c('PMID26982677','PMID27083205','PMID29934024','PMID29657622'),
Author = c("Aghighi et al.", "Felsch et al.", "Karmakar et al.", "Ghobrial et al."),
Estimate = c(0.82, 0.53, 0.51, 0.793),
SE = c(0.0357, 0.0357, 0.0847, 0.1122)
)
$trials_overall <- tibble(
meta_analysis_dataStat = "Fleiss",
Study = names(fleiss_kappa_estimates_ses),
Author = names(fleiss_kappa_estimates_ses)
%>%
) bind_cols(map_df(fleiss_kappa_estimates_ses, ~.x[["overall"]]) )
# meta_analysis_data$trials_first <- tibble(
# Stat = "Fleiss",
# Study = names(fleiss_kappa_estimates_ses)
# ) %>%
# bind_cols(map_df(fleiss_kappa_estimates_ses, ~.x[["first"]]) )
#
# meta_analysis_data$trials_last <- tibble(
# Stat = "Fleiss",
# Study = names(fleiss_kappa_estimates_ses)
# ) %>%
# bind_cols(map_df(fleiss_kappa_estimates_ses, ~.x[["last"]]) )
# Step 1: Define the logit and inverse logit transformations for kappa
<- function(kappa) log((kappa + 1) / (1 - kappa))
logit_kappa <- function(x) (exp(x) - 1) / (exp(x) + 1)
inv_logit_kappa
# Step 2: Apply the logit transformation and delta method for SE
<- meta_analysis_data %>%
meta_analysis_data_all bind_rows() %>%
mutate(
logit_kappa = logit_kappa(Estimate),
logit_se = SE * 2 / ((1 - Estimate^2) + 1e-8) # delta method; small constant to ensure no division by zero
)
###### OVERALL META ANALYSIS OPTIONS
## FREQUENTIST
# Conduct random-effects meta-analysis
<- rma(
rma_overall_freq yi = logit_kappa,
sei = logit_se,
data = arrange(meta_analysis_data_all, Estimate),
method = "REML",
slab = Author
)# Print summary of results
summary(rma_overall_freq, sei=logi_se)
# Create forest plot
forest(
rma_overall_freq,atransf = inv_logit_kappa,
main = "Overall Meta-Analysis",
xlab = "Observed IRR",
addpred = TRUE
)funnel(
rma_overall_freq,atransf = inv_logit_kappa,
main = "Funnel Plot of Overall Meta-Analysis",
xlab = "Observed IRR",
addtau2 = T,
label = T,
legend = F
)
<- regtest(rma_overall_freq)
reg = seq(0,1,length = 100)
se lines((coef(reg$fit)[1] + coef(reg$fit)[2]*se), se, lwd=3, lty=3, col = "red")
## BAYESMETA APPROACH
# should come up with more clear statement on how priors are selected here
# taupriordensity <- function(t){dhalfnormal(t, scale=sqrt(0.1))}
# bayes_meta_analysis_recist_log <- bayesmeta(y = log(meta_analysis_data_all[["Estimate"]]),
# sigma = meta_analysis_data_all[["SE"]],
# label = meta_analysis_data_all[["Study"]],
# mu.prior.mean = rma_overall_freq$b, mu.prior.sd= rma_overall_freq$se,
# tau.prior= "I2")
#
#
#
# # forest(bayes_meta_analysis_recist_log, atransf = exp)
#
# forestplot(bayes_meta_analysis_recist_log, exponentiate = T)
# forest(bayes_meta_analysis_recist_log, atransf = exp)
# funnel(bayes_meta_analysis_recist_log)
########### FLEISS KAPPA ONLY
<- meta_analysis_data_all %>%
meta_analysis_data_fleiss ::filter(Stat == "Fleiss")
dplyr
## FREQUENTIST
# Conduct random-effects meta-analysis
<- rma(
rma_fleiss_freq yi = logit_kappa,
sei = logit_se,
data = arrange(meta_analysis_data_fleiss, Estimate),
method = "REML",
slab = Author
)
# Print summary of results
summary(rma_fleiss_freq)
forest(
rma_fleiss_freq,atransf = inv_logit_kappa,
main = "Fleiss' Kappa Meta-Analysis",
xlab = "Observed IRR",
addpred = TRUE
)funnel(
rma_fleiss_freq,atransf = inv_logit_kappa,
main = "Funnel Plot of Fleiss' Kappa Meta-Analysis",
xlab = "Observed IRR",
addtau2 = T,
label = T,
legend = F
)
<- regtest(rma_fleiss_freq)
reg = seq(0,1,length = 100)
se lines((coef(reg$fit)[1] + coef(reg$fit)[2]*se), se, lwd=3, lty=3, col = "red")
## BAYESMETA APPROACH
# should come up with more clear statement on how priors are selected here
# taupriordensity <- function(t){dhalfnormal(t, scale=sqrt(0.1))}
# bayes_meta_analysis_recist_log_fleiss <- bayesmeta(y = log(meta_analysis_data_fleiss[["Estimate"]]),
# sigma = meta_analysis_data_fleiss[["SE"]],
# label = meta_analysis_data_fleiss[["Study"]],
# mu.prior.mean = rma_fleiss_freq$b, mu.prior.sd = rma_fleiss_freq$se,
# tau.prior= "I2")
#
#
# # forest(bayes_meta_analysis_recist_log, atransf = exp)
#
# forestplot(bayes_meta_analysis_recist_log_fleiss, exponentiate = T)
# forest(bayes_meta_analysis_recist_log_fleiss, atransf = exp)
# funnel(bayes_meta_analysis_recist_log_fleiss)
########### COHENS KAPPA ONLY
<- meta_analysis_data_all %>%
meta_analysis_data_cohen ::filter(Stat == "Cohen")
dplyr
# Conduct random-effects meta-analysis
<- rma(
rma_cohen_freq yi = logit_kappa,
sei = logit_se,
data = arrange(meta_analysis_data_cohen, Estimate),
method = "REML",
slab = Author
)
# Print summary of results
summary(rma_cohen_freq)
forest(
rma_cohen_freq,atransf = inv_logit_kappa,
main = "Cohen's Kappa Meta-Analysis",
xlab = "Observed IRR",
addpred = TRUE
)funnel(
rma_cohen_freq,atransf = inv_logit_kappa,
main = "Funnel Plot of Cohen's Kappa Meta-Analysis",
xlab = "Observed IRR",
addtau2 = T,
label = T,
legend = F
)
<- regtest(rma_cohen_freq)
reg = seq(0,1,length = 100)
se lines((coef(reg$fit)[1] + coef(reg$fit)[2]*se), se, lwd=3, lty=3, col = "red")
## BAYESMETA APPROACH
# should come up with more clear statement on how priors are selected here
# taupriordensity <- function(t){dhalfnormal(t, scale=sqrt(0.1))}
# bayes_meta_analysis_recist_log_cohen <- bayesmeta(y = log(meta_analysis_data_cohen[["Estimate"]]),
# sigma = meta_analysis_data_cohen[["SE"]],
# label = meta_analysis_data_cohen[["Study"]],
# mu.prior.mean = rma_cohen_freq$b, mu.prior.sd = rma_cohen_freq$se,
# tau.prior = "I2")
#
#
# # forest(bayes_meta_analysis_recist_log, atransf = exp)
#
# forestplot(bayes_meta_analysis_recist_log_cohen, exponentiate = T)
# forest(bayes_meta_analysis_recist_log_cohen, atransf = exp)
# funnel(bayes_meta_analysis_recist_log_cohen)
B.3 Overall Agreement Analyses
B.3.0.1 thesis_code/chisq_overall/chisq_overall.R
<- list()
chisq_overall_data
if (!"rs_files_with_multiple_raters" %in% ls()){
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
}
$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
chisq_overall_dataarrange(USUBJID, RSDY) %>%
::filter(
dplyr== "OVRLRESP",
RSTESTCD != "" & !is.na(RSSTRESC),
RSSTRESC !RSEVALID %in% c(""),
!is.na(RSDY) & RSDY > 0
)
$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
chisq_overall_dataarrange(UNI_TRNC, RSDY, RSTEST) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID
)
$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
chisq_overall_dataarrange(USUBJID, RSDY) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "OVRLRESP",
RSTESTCD == "RECIST 1.1"
RSCAT %>%
) # one instance of a duplicate--remove with distinct
distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
::filter(RSSTRESC != "BASELINE")
dplyr
<- imap(chisq_overall_data, ~{
chisq_test_overall_results %>%
.x select(RSEVALID, RSSTRESC) %>%
mutate(
RSSTRESC = toupper(RSSTRESC),
RSSTRESC = factor(RSSTRESC, levels = c("CR", "PR", "SD", "PD", "NE", "NON-CR/NON-PD"))
%>%
) table() %>%
chisq.test()
})
imap(chisq_test_overall_results, ~{
"observed"]] %>%
.x[[# addmargins() %>%
as.data.frame.matrix(row.names = extract2(attr(., "dimnames"), 1) ) %>%
rownames_to_column(var = "RSEVALID") %>%
::gt(rowname_col = "RSEVALID") %>%
gt::tab_header(title = glue::glue("{.y}: Contingency Table of Raters vs. RECIST Ratings")) %>%
gt::tab_footnote(
gtfootnote = glue::glue("{.x[['method']]} yields a p-value of {round(.x[['p.value']], 4)} on {.x[['parameter']]} degrees of freedom")
%>%
) ::tab_spanner(
gtlabel = "Evaluable",
columns = c("CR", "PR", "SD", "PD")
%>%
) ::tab_spanner(
gtlabel = "Not Evaluable",
columns = c("NE", "NON-CR/NON-PD")
) })
B.3.0.2 thesis_code/chisq_overall/friedman_overall.R
# Just source in the data prep from the other file
source("chisq_overall/chisq_overall.R")
# temp_data <- chisq_overall_data$NCT02395172 %>%
# select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
# pivot_wider(names_from = RSEVALID, values_from = RSSTRESC) %>%
# select(-1,-2) %>%
# na.omit() %>%
# # mutate(across(everything(), ~as.numeric(as.factor(.x)))) %>%
# as.matrix()
#
# friedman.test(temp_data)
map(chisq_overall_data, ~{
%>%
.x select(matches("USUBJID|UNI_ID"), RSDY, RSEVALID, RSSTRESC) %>%
pivot_wider(names_from = RSEVALID, values_from = RSSTRESC) %>%
select(-1,-2) %>%
na.omit() %>%
as.matrix() %>%
friedman.test()
})
B.3.0.3 thesis_code/chisq_overall/lme_overall.R
# Just source in the data prep from the other file
source("chisq_overall/chisq_overall.R")
library(lme4)
# temp_data <- mutate(
# chisq_overall_data$NCT03631706,
# RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
# )
#
# temp_model <- lmer(
# as.numeric(as.factor(RSSTRESC)) ~ RSEVALID + (1 | USUBJID), data = temp_data
# )
#
# emmeans::emmeans(
# temp_model,
# pairwise ~ RSEVALID
# )
<- map(chisq_overall_data, ~{
overall_outcomes_lme %<>%
.x rename("USUBJID" = matches("USUBJID|UNI_ID")) %>%
::filter(RSSTRESC != "NE") %>%
dplyrmutate(
RSSTRESC = factor(RSSTRESC, levels = c("PD","NON-CR/NON-PD","SD","PR","CR"))
)
<- mutate(
temp_data
.x,RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
<- lmer(
temp_model as.numeric(as.factor(RSSTRESC)) ~ RSEVALID + (1 | USUBJID), data = temp_data
)
<- emmeans::emmeans(
emmeans_model
temp_model,~ RSEVALID
pairwise
)
return(list(
mixed_model = temp_model,
emmeans = emmeans_model
))
})
<- overall_outcomes_lme %>%
lme_latex_equations map(~{
"mixed_model"]] %>%
.x[[::extract_eq(use_coefs = T)
equatiomatic
})
<- overall_outcomes_lme %>%
lme_contrasts_table_only map(~{
"emmeans"]][["contrasts"]] %>%
.x[[as.data.frame() %>%
tibble() # strip summary_emm class and associated info
})
saveRDS(
list(lme_latex_equations = lme_latex_equations,
lme_contrasts_table_only = lme_contrasts_table_only),
file = "transfer/lme_overall_results.rds"
)
B.4 Objective Response Rate Analyses
B.4.0.1 thesis_code/response_rate/objective_response_rate.R
# TO BE COMPLETED STILL
# Want to calculate the proportions by Rater for each study
# Then I want to do a Chi-squared test to see if there is a notable
# difference at all. If a difference is detected, then I could use the
# Marascuilo procedure which is a multiple comparison test used to determine
# which specific proprotions differ significantly from one another
<- list()
objective_response_rate_data
if (!"rs_files_with_multiple_raters" %in% ls()){
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
}
$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
objective_response_rate_dataarrange(USUBJID, RSDY) %>%
::filter(
dplyr== "OVRLRESP",
RSTESTCD != "" & !is.na(RSSTRESC),
RSSTRESC !RSEVALID %in% c(""),
!is.na(RSDY) & RSDY > 0
)
$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
objective_response_rate_dataarrange(UNI_TRNC, RSDY, RSTEST) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID
)
$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
objective_response_rate_dataarrange(USUBJID, RSDY) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "OVRLRESP",
RSTESTCD == "RECIST 1.1"
RSCAT %>%
) # one instance of a duplicate--remove with distinct
distinct(USUBJID, RSEVALID, RSDY, .keep_all = T)
<- map(objective_response_rate_data, ~{
orr_chisq_summarized_tables <- .x %>%
temp group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
summarize(
responder = any(RSSTRESC %in% c("CR", "PR"))
%>%
) group_by(RSEVALID) %>%
summarize(
count_true = sum(responder),
count_false = sum(responder == FALSE)
)
})
<- map(orr_chisq_summarized_tables, ~{
orr_chisq_tests <- as.matrix(select(.x, -1))
temp dimnames(temp) <- list(unique(.x[["RSEVALID"]]), c("count_true", "count_false"))
chisq.test(temp)
})
# saveRDS(objective_response_rate_data, "./presentation/objective_response_rate_data.rds")
$NCT02395172
orr_chisq_tests
imap(orr_chisq_tests, ~{
tibble(
Study = .y,
sample_size = sum(.x[["observed"]]),
p_value = round(.x[["p.value"]], 4),
test_statistic = round(.x[["statistic"]], 2),
df = .x[["parameter"]]
)%>%
}) bind_rows() %>%
::gt() %>%
gt::tab_header(title = glue::glue("Objective Response Rate: Comparing Raters using Chi-Squared Test")) %>%
gt::tab_footnote(
gtfootnote = "No differences in the distribution of rating participants as having an objective response was detected in any of the 3 trials. Failing to reject the null hypothesis of no difference, however, does not necessarily imply equivalence. Later analyses will look at this in more detail."
)
B.4.0.2 thesis_code/response_rate/orr_cochran_q.R
# Cochran's Q omnibus test
source("response_rate/objective_response_rate.R")
<- map(objective_response_rate_data, ~{
orr_cochrans_q %>%
.x group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
summarize(
responder = any(RSSTRESC %in% c("CR", "PR"))
%>%
) ungroup() %>%
pivot_wider(names_from = RSEVALID, values_from = responder) %>%
select(-1) %>%
as.matrix() %>%
na.omit() %>%
::cochrans.q()
nonpar
})
# Post-hoc McNemar pairwise tests
<- map(objective_response_rate_data, ~{
orr_mcnemars_tests <- .x %>%
data_prep group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
summarize(
responder = any(RSSTRESC %in% c("CR", "PR"))
%>%
) ungroup() %>%
pivot_wider(names_from = RSEVALID, values_from = responder) %>%
select(-1)
<- data_prep %>%
reader1_reader2 select("READER 1", "READER 2") %>%
table()
<- data_prep %>%
reader1_site select("READER 1", "SITE INVESTIGATOR") %>%
table()
<- data_prep %>%
reader2_site select("READER 2", "SITE INVESTIGATOR") %>%
table()
<- mcnemar.test(reader1_reader2)
mcnemar_reader1_reader2 <- mcnemar.test(reader1_site)
mcnemar_reader1_site <- mcnemar.test(reader2_site)
mcnemar_reader2_site
browser()
<- c(
p_vals reader1_reader2 = mcnemar_reader1_reader2$p.value,
reader1_site = mcnemar_reader1_site$p.value,
reader2_site = mcnemar_reader2_site$p.value
)
<- p.adjust(p_vals, method = "bonferroni")
adjusted_p_vals
return(list(
adjusted_p_vals = adjusted_p_vals,
two_way_tables = list(reader1_reader2 = reader1_reader2,
reader1_site = reader1_site,
reader2_site = reader2_site)
)
)
})
saveRDS(
list(orr_cochrans_q = orr_cochrans_q,
orr_mcnemars_tests = orr_mcnemars_tests
),file = "transfer/orr_cochrans_mcnemars.rds"
)
B.5 Survival Endpoint Analyses
B.5.0.1 thesis_code/survival_analyses/survival_data_duration_response.R
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
<- list()
survival_data_duration_response
$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
survival_data_duration_response::filter(
dplyr== "OVRLRESP",
RSTESTCD != "",
RSORRES > 0,
RSDY !is.na(RSEVALID), !RSEVALID == ""
%>%
) group_by(USUBJID, RSEVALID) %>%
select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
mutate(
first_response = (RSSTRESC %in% c("CR", "PR") & cumsum(RSSTRESC %in% c("CR", "PR")) == 1 ),
first_disease_progression = (RSSTRESC %in% c("PD") & cumsum(RSSTRESC %in% c("PD")) == 1),
end_date = first_disease_progression | (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
%>%
) ::filter(any(first_response == TRUE)) %>%
dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
dplyrmutate(
time_interval = RSDY - lag(RSDY),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
%>%
) ::filter(!is.na(time_interval))
dplyr
$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
survival_data_duration_responsemutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID %>%
) arrange(UNI_ID, RSDY) %>%
group_by(UNI_ID, RSEVALID) %>%
select(UNI_ID, RSDY, RSEVALID, RSSTRESC) %>%
mutate(
first_response = (RSSTRESC %in% c("CR", "PR") & cumsum(RSSTRESC %in% c("CR", "PR")) == 1 ),
first_disease_progression = (RSSTRESC %in% c("PD") & cumsum(RSSTRESC %in% c("PD")) == 1),
end_date = first_disease_progression | (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
%>%
) ::filter(any(first_response == TRUE)) %>%
dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
dplyrmutate(
time_interval = RSDY - lag(RSDY),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
%>%
) ::filter(!is.na(time_interval))
dplyr
$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
survival_data_duration_responsemutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP"
RSTESTCD %>%
) arrange(USUBJID, RSDY) %>%
group_by(USUBJID, RSEVALID) %>%
select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
mutate(
first_response = (RSSTRESC %in% c("CR", "PR") & cumsum(RSSTRESC %in% c("CR", "PR")) == 1 ),
first_disease_progression = (RSSTRESC %in% c("PD") & cumsum(RSSTRESC %in% c("PD")) == 1),
end_date = first_disease_progression | (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
%>%
) ::filter(any(first_response == TRUE)) %>%
dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
dplyrmutate(
time_interval = RSDY - lag(RSDY),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
%>%
) ::filter(!is.na(time_interval))
dplyr
B.5.0.2 thesis_code/survival_analyses/survival_data_time_progression.R
<- list()
survival_data_progression
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
survival_data_progression::filter(
dplyr== "OVRLRESP",
RSTESTCD != "",
RSORRES > 0,
RSDY !is.na(RSEVALID), !RSEVALID == ""
%>%
) group_by(USUBJID, RSEVALID) %>%
select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
::filter(
dplyr== "PD") |
(RSSTRESC != "PD" & RSDY == max(RSDY))
(RSSTRESC %>%
) ::filter(RSDY == min(RSDY)) %>%
dplyrarrange(USUBJID, RSEVALID, RSDY) %>%
mutate(
general_status = if_else(RSSTRESC == "PD", 1, 0),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
survival_data_progression# mutate(
# RSEVALID = if_else(RSEVAL == "SITE INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
# RSEVAL,
# RSEVALID)
# ) %>%
::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID %>%
) arrange(UNI_ID, RSDY) %>%
group_by(UNI_ID, RSEVALID) %>%
select(UNI_ID, RSDY, RSEVALID, RSSTRESC) %>%
::filter(
dplyr%in% c("PD")) |
(RSSTRESC all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
(%>%
) ::filter(RSDY == min(RSDY)) %>%
dplyrarrange(UNI_ID, RSEVALID, RSDY) %>%
mutate(
general_status = if_else(RSSTRESC %in% c("PD"), 1, 0),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
survival_data_progressionmutate(
RSEVALID = if_else(RSEVAL == "SITE INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP"
RSTESTCD %>%
) arrange(USUBJID, RSDY) %>%
group_by(USUBJID, RSEVALID) %>%
select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
::filter(
dplyr%in% c("PD")) |
(RSSTRESC all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
(%>%
) ::filter(RSDY == min(RSDY)) %>%
dplyrarrange(USUBJID, RSEVALID, RSDY) %>%
mutate(
general_status = if_else(RSSTRESC %in% c("PD"), 1, 0),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
B.5.0.3 thesis_code/survival_analyses/survival_data_time_response.R
<- list()
survival_data_response
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
survival_data_response::filter(
dplyr== "OVRLRESP",
RSTESTCD != "",
RSORRES > 0,
RSDY !is.na(RSEVALID), !RSEVALID == ""
%>%
) group_by(USUBJID, RSEVALID) %>%
select(USUBJID, RSDY, RSEVALID, RSORRES, RSSTRESC) %>%
::filter(
dplyr%in% c("PR", "CR")) |
(RSSTRESC all(!RSSTRESC %in% c("PR", "CR")) & RSDY == max(RSDY))
(%>%
) ::filter(RSDY == min(RSDY)) %>%
dplyrarrange(USUBJID, RSEVALID, RSDY) %>%
mutate(
general_status = if_else(RSSTRESC %in% c("CR", "PR"), 1, 0),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
survival_data_responsearrange(UNI_ID, RSEVAL) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP",
RSTESTCD %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
(RSGRPID %>%
) group_by(UNI_ID, RSEVALID) %>%
select(UNI_ID, RSDY, RSEVALID, RSSTRESC) %>%
::filter(
dplyr%in% c("PR", "CR")) |
(RSSTRESC all(!RSSTRESC %in% c("PR", "CR")) & RSDY == max(RSDY))
(%>%
) ::filter(RSDY == min(RSDY)) %>%
dplyrarrange(UNI_ID, RSEVALID, RSDY) %>%
mutate(
general_status = if_else(RSSTRESC %in% c("PR", "CR"), 1, 0),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
survival_data_responsearrange(USUBJID, RSEVALID) %>%
mutate(
RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
RSEVAL,
RSEVALID)%>%
) ::filter(
dplyr== "RECIST 1.1",
RSCAT == "OVRLRESP"
RSTESTCD %>%
) group_by(USUBJID, RSEVALID) %>%
select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
::filter(
dplyr%in% c("PR", "CR")) |
(RSSTRESC all(!RSSTRESC %in% c("PR", "CR")) & RSDY == max(RSDY))
(%>%
) ::filter(RSDY == min(RSDY)) %>%
dplyrarrange(USUBJID, RSEVALID) %>%
mutate(
general_status = if_else(RSSTRESC %in% c("PR", "CR"), 1, 0),
RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
)
B.5.0.4 thesis_code/survival_analyses/survival_duration_response_function.R
<- function(survival_data_duration_response, plot = TRUE) {
survival_duration_response imap(survival_data_duration_response, ~{
<- .x
data <- survminer::surv_fit(Surv(RSDY, first_disease_progression) ~ RSEVALID, data = data)
survival_progression_object
<- paste0(.y, ": Duration of Response")
title <- levels(.x$RSEVALID)
legend_labels
if (plot) {
<- survminer::ggsurvplot(
survival_plot
survival_progression_object,data = data,
tables.theme = theme_cleantable(),
risk.table = T,
pval = TRUE,
title = title,
palette = c("#86251b", "#076d7e", "#6ad2e2"),
legend.title = "Tumor Response Evaluator:",
legend.labs = legend_labels,
xlab = "Time (Days)",
ggtheme = theme_bw() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
axis.title = element_text(size = 14),
axis.text = element_text(size = 11)
)
)else {
} <- NULL
survival_plot
}
<- Surv(time = .x[["RSDY"]], event = .x[["first_disease_progression"]] )
surv_obj_overall
<- survminer::pairwise_survdiff(
log_rank_test Surv(time = RSDY, event = first_disease_progression) ~ RSEVALID,
data = .x,
p.adjust.method = "BH"
)
<- tryCatch(coxph(surv_obj_overall ~ RSEVALID, data = .x), error = function(e) NA)
cox_model <- tryCatch(emmeans::emmeans(cox_model, pairwise ~ RSEVALID), error = function(e) NA)
emmeans_result
return(
list(
survival_plot = survival_plot,
surv_obj_overall = surv_obj_overall,
log_rank_test = log_rank_test,
cox_model = cox_model,
emmeans_result = emmeans_result
)
)
}) }
B.5.0.5 thesis_code/survival_analyses/survival_duration_response.R
library(survival)
library(stringr)
library(ggplot2)
library(survminer)
library(metafor)
source("~/study_identification/survival_analyses/survival_data_duration_response.R")
<- imap(survival_data_duration_response, ~{
survival_duration_response
%<>%
.x rename(USUBJID = matches("UNI_ID|USBUBJID"))
<- .x
data <- survminer::surv_fit(Surv(time_interval, first_disease_progression) ~ RSEVALID, data = data)
survival_progression_object
<- paste0(.y, ": Duration of Response")
title <- levels(.x$RSEVALID)
legend_labels
<- survminer::ggsurvplot(
survival_plot
survival_progression_object,data = data,
tables.theme = theme_cleantable(),
risk.table = T,
pval = TRUE,
pval.method = TRUE,
title = title,
palette = c("#86251b", "#076d7e", "#6ad2e2"),
legend.title = "Tumor Response Evaluator:",
legend.labs = legend_labels,
xlab = "Time (Days)",
ggtheme = theme_bw() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
axis.title = element_text(size = 14),
axis.text = element_text(size = 11)
)
)
<- Surv(time = .x[["time_interval"]], event = .x[["first_disease_progression"]] )
surv_obj_overall
<- survdiff(
log_rank_test Surv(data$time_interval, data$first_disease_progression) ~ RSEVALID,
data = data
)
<- survminer::pairwise_survdiff(
pairwise_survdiff Surv(time = time_interval, event = first_disease_progression) ~ RSEVALID,
data = .x,
p.adjust.method = "BH"
)
<- coxph(surv_obj_overall ~ RSEVALID, data = .x, cluster = USUBJID)
cox_model <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)
emmeans_result
return(
list(
survival_progression_object = survival_progression_object,
survival_plot = survival_plot,
surv_obj_overall = surv_obj_overall,
log_rank_test = log_rank_test,
pairwise_survdiff = pairwise_survdiff,
cox_model = cox_model,
emmeans_result = emmeans_result
)
)
})
# saveRDS(survival_duration_response, "./presentation/survival_duration_response_object.rds")
# survival_duration_response
imap(survival_duration_response,
~{ggcoxzph(
cox.zph(.x[["cox_model"]])
+
)labs(title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Response"))
ggsave(
paste0("./transfer/survival_images/", .y, "_dor_cox_zph.jpeg"),
units = "px", width = 4160, height = 2880
)
}
)
imap(survival_duration_response, ~
ggsurvplot(
{"survival_progression_object"]],
.x[[fun = "cloglog", # Complementary log-log = log(-log(S(t)))
palette = "Dark2",
xlab = "Time (Days)",
ylab = "log(-log(Survival))",
ggtheme = theme_minimal() + theme(legend.text = element_text(size = 12),
legend.title = element_text(size = 15),
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)+
) labs(
title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Response with log(-log(Survival)) check")
)
ggsave(
paste0("./transfer/survival_images/", .y, "_dor_cox_cloglog.jpeg"),
units = "px", width = 4160, height = 2880
)
}
)
map(survival_duration_response,
~.x[["log_rank_test"]]
)
imap(survival_duration_response, ~{
"survival_plot"]]
.x[[
# can't have combined plot and table saved with ggplot
# ggsave(
# paste0("./transfer/survival_images/", .y, "_dor_kaplan_meier.jpeg"),
# units = "px", width = 4160, height = 2880
# )
})
<- survival_duration_response %>%
survival_duration_response_meta_data map(~map(.x["emmeans_result"], ~{
as.data.frame(.x$contrasts)
%>%
})) unlist(recursive = FALSE) %>%
list_rbind(names_to = "id") %>%
mutate(id = str_remove(id, ".emmeans_result") %>%
paste0(., ": ", contrast)
%>%
) arrange(id) %>%
::filter(
dplyrstr_detect(id, "ONCOLOGIST|INVESTIGATOR")
)
saveRDS(survival_duration_response_meta_data, "./transfer/survival_dor_emmeans.rds")
# comparison_counts <- survival_data_duration_response %>%
# map(
# ~{
# data <- rename(.x, USUBJID = matches("UNI_ID|USUBJID"))
# distinct(data, USUBJID, RSEVALID) %>%
# group_by(USUBJID) %>%
# summarize(
# investigator_reader1 = sum(any(RSEVALID == "SITE INVESTIGATOR") & any(RSEVALID == "READER 1")),
# investigator_reader2 = sum(any(RSEVALID == "SITE INVESTIGATOR") & any(RSEVALID == "READER 2"))) %>%
# ungroup() %>%
# summarize(
# investigator_reader1 = sum(investigator_reader1 ),
# investigator_reader2 = sum(investigator_reader2)
# )
# }
# )
%>%
survival_duration_response map(~{
"cox_model"]][["n"]]
.x[[
})
<- rma(
rma_survival_duration_response yi = estimate,
sei = SE,
data = survival_duration_response_meta_data,
method = "REML",
slab = id
)
# Print summary of results
summary(rma_survival_duration_response)
# Create forest plot
forest(rma_survival_duration_response,
# atransf = exp,
main = "Duration of Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR)",
addpred = T)
funnel(rma_survival_duration_response,
# atransf = exp,
main = "Duration of Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR)")
# Define equivalence bounds
<- 0.8
eq_lower <- 1.25
eq_upper # Estimate standard error from CI (approximate)
# se <- (ci_upper - ci_lower) / (2 * qnorm(0.975))
# Perform TOST equivalence test
# Since we are comparing to a reference HR of 1, we use m1 = log(hr), mu = 0
<- TOSTER::TOSTone.raw(
TOST_duration_of_response_result m = rma_survival_duration_response$beta[1],
mu = 0,
sd = rma_survival_duration_response$se * sqrt(6),
n = 6, # Placeholder, since we use raw values
low_eqbound = log(eq_lower),
high_eqbound = log(eq_upper),
alpha = 0.05
)
print(TOST_duration_of_response_result)
#
# surv_obj_overall <- Surv(time = survival_data_duration_response[[3]]$RSDY, event = survival_data_duration_response[[3]]$first_disease_progression)
#
# survfit(
# surv_obj_overall ~ RSEVALID,
# data = survival_data_duration_response[[3]]
# ) %>%
# survminer::ggsurvplot(risk.table = T, title = "Trial 3: Response Duration")
#
#
# cox_model <- coxph(surv_obj_overall ~ RSEVALID, data = survival_data_duration_response[[3]])
#
# res <- survminer::pairwise_survdiff(
# Surv(time = RSDY, event = first_disease_progression) ~ RSEVALID,
# data = survival_data_duration_response[[3]],
# p.adjust.method = "BH"
# )
#
# res
#
# emmeans::emmeans(cox_model, pairwise ~ RSEVALID)
B.5.0.6 thesis_code/survival_analyses/survival_time_progression_function.R
<- function(survival_data_progression, plot = TRUE) {
survival_time_progression imap(survival_data_progression, ~{
<- .x
data <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)
survival_progression_object
<- paste0(.y, ": Progression")
title <- levels(.x$RSEVALID)
legend_labels
if (plot) {
<- survminer::ggsurvplot(
survival_plot
survival_progression_object,data = data,
tables.theme = theme_cleantable(),
risk.table = T,
pval = TRUE,
title = title,
palette = c("#86251b", "#076d7e", "#6ad2e2"),
legend.title = "Tumor Response Evaluator:",
# legend.labs = legend_labels,
xlab = "Time (Days)",
ggtheme = theme_bw() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
axis.title = element_text(size = 14),
axis.text = element_text(size = 11)
)
)else {
} <- NULL
survival_plot
}
<- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )
surv_obj_overall
<- survdiff(
log_rank_test Surv(data$RSDY, data$general_status) ~ RSEVALID,
data = data
)
<- survminer::pairwise_survdiff(
pairwise_survdiff Surv(time = RSDY, event = general_status) ~ RSEVALID,
data = .x,
p.adjust.method = "BH"
)
<- coxph(surv_obj_overall ~ RSEVALID, data = .x)
cox_model <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)
emmeans_result
return(
list(
survival_progression_object = survival_progression_object,
survival_plot = survival_plot,
surv_obj_overall = surv_obj_overall,
log_rank_test = log_rank_test,
pairwise_survdiff = pairwise_survdiff,
cox_model = cox_model,
emmeans_result = emmeans_result
)
)
}) }
B.5.0.7 thesis_code/survival_analyses/survival_time_progression.R
library(survival)
library(stringr)
library(ggplot2)
library(survminer)
library(metafor)
source("~/study_identification/survival_analyses/survival_data_time_progression.R")
<- imap(survival_data_progression, ~{
survival_progression
%<>%
.x rename(USUBJID = matches("UNI_ID|USBUBJID"))
<- .x
data <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)
survival_progression_object
<- paste0(.y, ": Progression")
title <- levels(.x$RSEVALID)
legend_labels
<- survminer::ggsurvplot(
survival_plot
survival_progression_object,data = data,
tables.theme = theme_cleantable(),
risk.table = T,
pval = TRUE,
pval.method = TRUE,
title = title,
palette = c("#86251b", "#076d7e", "#6ad2e2"),
legend.title = "Tumor Response Evaluator:",
legend.labs = legend_labels,
xlab = "Time (Days)",
ggtheme = theme_bw() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
axis.title = element_text(size = 14),
axis.text = element_text(size = 11)
)
)
<- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )
surv_obj_overall
<- survdiff(
log_rank_test Surv(data$RSDY, data$general_status) ~ RSEVALID,
data = data
)
<- survminer::pairwise_survdiff(
pairwise_survdiff Surv(time = RSDY, event = general_status) ~ RSEVALID,
data = .x,
p.adjust.method = "BH"
)
<- coxph(surv_obj_overall ~ RSEVALID, data = .x, cluster = USUBJID)
cox_model <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)
emmeans_result
return(
list(
survival_progression_object = survival_progression_object,
survival_plot = survival_plot,
surv_obj_overall = surv_obj_overall,
log_rank_test = log_rank_test,
pairwise_survdiff = pairwise_survdiff,
cox_model = cox_model,
emmeans_result = emmeans_result
)
)
})
# saveRDS(survival_progression, "./presentation/survival_progression_object.rds")
imap(survival_progression,
~{ggcoxzph(
cox.zph(.x[["cox_model"]])
+
) labs(title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Progression"))
ggsave(
paste0("./transfer/survival_images/", .y, "_progression_cox_zph.jpeg"),
units = "px", width = 4160, height = 2880
)
}
)
imap(survival_progression, ~{
<- ggsurvplot(
temp "survival_progression_object"]],
.x[[fun = "cloglog", # Complementary log-log = log(-log(S(t)))
palette = c("#86251b", "#076d7e", "#6ad2e2"),
xlab = "Time",
ylab = "log(-log(Survival))",
ggtheme = theme_minimal() + theme(legend.text = element_text(size = 12),
legend.title = element_text(size = 15),
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)+
) labs(
title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Progression with log(-log(Survival)) check")
)
ggsave(
paste0("./transfer/survival_images/", .y, "_progression_cox_cloglog.jpeg"),
units = "px", width = 4160, height = 2880
)
}
)
map(survival_progression,
~{
"log_rank_test"]]
.x[[
}
)
imap(survival_progression, ~{
"survival_plot"]]
.x[[
# can't have combined plot and table saved with ggplot
# ggsave(
# paste0("./transfer/survival_images/", .y, "_progression_kaplan_meier.jpeg"),
# units = "px", width = 4160, height = 2880
# )
})
<- survival_progression %>%
survival_progression_meta_data map(~map(.x["emmeans_result"], ~{
as.data.frame(.x$contrasts)
%>%
})) unlist(recursive = FALSE) %>%
list_rbind(names_to = "id") %>%
mutate(id = str_remove(id, ".emmeans_result") %>%
paste0(., ": ", contrast)
%>%
) arrange(id) %>%
::filter(
dplyrstr_detect(id, "ONCOLOGIST|INVESTIGATOR")
)
saveRDS(survival_progression_meta_data, "./transfer/survival_progression_emmeans.rds")
<- rma(
rma_survival_progression yi = estimate,
sei = SE,
data = survival_progression_meta_data,
method = "REML",
slab = id
)
# Print summary of results
summary(rma_survival_progression)
# Create forest plot
forest(rma_survival_progression,
# atransf = exp,
main = "Time to Progression: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference",
addpred = T)
funnel(rma_survival_progression,
# atransf = exp,
main = "Time to Progression: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference")
# Define equivalence bounds
<- 0.8
eq_lower <- 1.25
eq_upper # Estimate standard error from CI (approximate)
# se <- (ci_upper - ci_lower) / (2 * qnorm(0.975))
# Perform TOST equivalence test
# Since we are comparing to a reference HR of 1, we use m1 = log(hr), mu = 0
<- TOSTER::TOSTone.raw(
TOST_time_to_progression_result m = (rma_survival_progression$beta[1]),
mu = 0,
sd = rma_survival_progression$se * sqrt(6),
n = 6, # Placeholder, since we use raw values
low_eqbound = log(eq_lower),
high_eqbound = log(eq_upper),
alpha = 0.05
)
TOST_time_to_progression_result
B.5.0.8 thesis_code/survival_analyses/survival_time_response_function.R
<- function(survival_data_duration_response, plot = TRUE) {
survival_time_response imap(survival_data_duration_response, ~{
<- .x
data <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)
survival_progression_object
<- paste0(.y, ": Time to Response")
title <- levels(.x$RSEVALID)
legend_labels
if (plot) {
<- survminer::ggsurvplot(
survival_plot
survival_progression_object,data = data,
tables.theme = theme_cleantable(),
risk.table = T,
pval = TRUE,
title = title,
palette = c("#86251b", "#076d7e", "#6ad2e2"),
legend.title = "Tumor Response Evaluator:",
legend.labs = legend_labels,
xlab = "Time (Days)",
ggtheme = theme_bw() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
axis.title = element_text(size = 14),
axis.text = element_text(size = 11)
)
)else {
} <- NULL
survival_plot
}
<- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )
surv_obj_overall
<- survminer::pairwise_survdiff(
log_rank_test Surv(time = RSDY, event = general_status) ~ RSEVALID,
data = .x,
p.adjust.method = "BH"
)
<- tryCatch(coxph(surv_obj_overall ~ RSEVALID, data = .x), error = function(e) NA)
cox_model <- tryCatch(emmeans::emmeans(cox_model, pairwise ~ RSEVALID), error = function(e) NA)
emmeans_result
return(
list(
survival_plot = survival_plot,
surv_obj_overall = surv_obj_overall,
log_rank_test = log_rank_test,
cox_model = cox_model,
emmeans_result = emmeans_result
)
)
})
}
B.5.0.9 thesis_code/survival_analyses/survival_time_response.R
library(survival)
library(stringr)
library(ggplot2)
library(survminer)
library(metafor)
source("~/study_identification/survival_analyses/survival_data_time_response.R")
<- imap(survival_data_response, ~{
survival_response
%<>%
.x rename(USUBJID = matches("UNI_ID|USBUBJID"))
<- .x
data <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)
survival_progression_object
<- paste0(.y, ": Response")
title <- levels(.x$RSEVALID)
legend_labels
<- survminer::ggsurvplot(
survival_plot
survival_progression_object,data = data,
tables.theme = theme_cleantable(),
risk.table = T,
pval = TRUE,
pval.method = TRUE,
title = title,
palette = c("#86251b", "#076d7e", "#6ad2e2"),
legend.title = "Tumor Response Evaluator:",
legend.labs = legend_labels,
xlab = "Time (Days)",
ggtheme = theme_bw() +
theme(
plot.title = element_text(size = 18, hjust = 0.5),
legend.title = element_text(size = 13),
legend.text = element_text(size = 13),
axis.title = element_text(size = 14),
axis.text = element_text(size = 11)
)
)
<- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )
surv_obj_overall
<- survdiff(
log_rank_test Surv(data$RSDY, data$general_status) ~ RSEVALID,
data = data
)
<- survminer::pairwise_survdiff(
pairwise_survdiff Surv(time = RSDY, event = general_status) ~ RSEVALID,
data = .x,
p.adjust.method = "BH"
)
<- coxph(surv_obj_overall ~ RSEVALID, data = .x, cluster = USUBJID)
cox_model <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)
emmeans_result
return(
list(
survival_progression_object = survival_progression_object,
survival_plot = survival_plot,
surv_obj_overall = surv_obj_overall,
log_rank_test = log_rank_test,
pairwise_survdiff = pairwise_survdiff,
cox_model = cox_model,
emmeans_result = emmeans_result
)
)
})
# saveRDS(survival_response, "./presentation/survival_response_object.rds")
# survival_response
imap(survival_response,
~{ggcoxzph(
cox.zph(.x[["cox_model"]])
+
)labs(title = paste0(.y, ": Test of proportional hazards assumption for Cox regression of Duration of Response"))
ggsave(
paste0("./transfer/survival_images/", .y, "_response_cox_zph.jpeg"),
units = "px", width = 4160, height = 2880
)
}
)
imap(survival_response, ~
ggsurvplot(
{"survival_progression_object"]],
.x[[fun = "cloglog", # Complementary log-log = log(-log(S(t)))
palette = "Dark2",
xlab = "Time",
ylab = "log(-log(Survival))",
ggtheme = theme_minimal() + theme(legend.text = element_text(size = 12),
legend.title = element_text(size = 15),
plot.title = element_text(size = 16),
axis.title = element_text(size = 14),
axis.text = element_text(size = 12)
)+
)labs(
title = paste0(.y, ": Test of PH assumption for Cox regression of Duration of Response with log(-log(Survival)) check")
)
ggsave(
paste0("./transfer/survival_images/", .y, "_response_cox_cloglog.jpeg"),
units = "px", width = 4160, height = 2880
)
}
)
map(survival_response,
~.x[["log_rank_test"]]
)
imap(survival_response, ~{
"survival_plot"]]
.x[[
# can't have combined plot and table saved with ggplot
# ggsave(
# paste0("./transfer/survival_images/", .y, "_response_kaplan_meier.jpeg"),
# units = "px", width = 4160, height = 2880
# )
})
<- survival_response %>%
survival_response_meta_data map(~map(.x["emmeans_result"], ~{
as.data.frame(.x$contrasts)
%>%
})) unlist(recursive = FALSE) %>%
list_rbind(names_to = "id") %>%
mutate(id = str_remove(id, ".emmeans_result") %>%
paste0(., ": ", contrast)
%>%
) arrange(id) %>%
::filter(
dplyrstr_detect(id, "ONCOLOGIST|INVESTIGATOR")
)
saveRDS(survival_response_meta_data, "./transfer/survival_response_emmeans.rds")
<- rma(
rma_survival_response yi = estimate,
sei = SE,
data = survival_response_meta_data,
method = "REML",
slab = id
)
# Print summary of results
summary(rma_survival_response)
# Create forest plot
forest(rma_survival_response,
# atransf = exp,
main = "Time to Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference",
addpred = T)
funnel(rma_survival_response,
# atransf = exp,
main = "Time to Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference")
# Define equivalence bounds
<- 0.8
eq_lower <- 1.25
eq_upper # Estimate standard error from CI (approximate)
# se <- (ci_upper - ci_lower) / (2 * qnorm(0.975))
# Perform TOST equivalence test
# Since we are comparing to a reference HR of 1, we use m1 = log(hr), mu = 0
<- TOSTER::TOSTone.raw(
TOST_time_to_response_result m = (rma_survival_response$beta[1]),
mu = 0,
sd = rma_survival_response$se * sqrt(6),
n = 6, # Placeholder, since we use raw values
low_eqbound = log(eq_lower),
high_eqbound = log(eq_upper),
alpha = 0.05
)
print(TOST_time_to_response_result)
B.5.0.10 thesis_code/survival_analyses/tost_plots.R
= log(0.8)
lower_bound = log(1.25)
upper_bound
# Line segments from estimate to CI bounds
<- upper_bound - lower_bound
x_range <- lower_bound - 0.25 * x_range
x_min <- upper_bound + 0.25 * x_range
x_max
<- map2(
tost_data list(TOST_time_to_progression_result, TOST_time_to_response_result, TOST_duration_of_response_result),
list("Time to Progression", "Time to Response", "Duration of Response"),
~{
list(
data.frame(
estimate = .x[["diff"]],
CI_lower = .x[["LL_CI_TOST"]],
CI_upper = .x[["UL_CI_TOST"]],
y= 0
),
.y
)
})
<- function(data, title) {
plot_tost_results ggplot(data, aes(x = estimate, y = y)) +
# Line segments from estimate to CI bounds
geom_segment(aes(x = estimate, xend = CI_lower, y = y, yend = y), size = 1.2) +
geom_segment(aes(x = estimate, xend = CI_upper, y = y, yend = y), size = 1.2) +
# Point for estimate
geom_point(size = 4, shape = 21, fill = "black") +
# Equivalence bounds (same color)
geom_vline(xintercept = lower_bound, linetype = "dashed", color = "darkgreen", size = 1) +
geom_vline(xintercept = upper_bound, linetype = "dashed", color = "darkgreen", size = 1) +
# Aesthetics
scale_y_continuous(NULL, breaks = NULL) +
scale_x_continuous(limits = c(x_min, x_max)) +
labs(x = "ln(Hazard Ratio) Difference",
title = paste("Two One-Sided Tests:", title),
subtitle = "90% Confidence Interval of Estimate"
+
) theme_minimal() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
}
map(tost_data, ~plot_tost_results(.x[[1]], .x[[2]]))
B.6 Sensitivity Analyses
B.6.0.1 thesis_code/simulate_thresholds/irr_sim_study_one.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_one_simulation_data" %in% ls()){
# tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }
#
# ################################ TARGET OUTCOME SIMULATION
#
#
#
# irr_simulation_target_outcome_study_one <- tr_study_one_simulation_data %>%
# map(~{
# .x %>%
# select(USUBJID, RSDY, RSEVALID, target_response_calculated ) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
# saveRDS(irr_simulation_target_outcome_study_one, "./simulate_thresholds/irr_simulation_target_outcome_study_one.rds")
if (!"irr_simulation_target_outcome_study_one" %in% ls()){
<- readRDS("./simulate_thresholds/irr_simulation_target_outcome_study_one.rds")
irr_simulation_target_outcome_study_one
}
<- all_combos %>%
study_one_original_irr_id mutate(
id = row_number()
%>%
) ::filter(
dplyr== 0.2,
progression_threshold == 0.3
partial_response_threshold %>%
) pull(id)
<- irr_simulation_target_outcome_study_one[[study_one_original_irr_id]]$value
study_one_original_irr_value_target
%>%
all_combos mutate(
fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome_study_one, ~.x[["value"]]),
fleiss_kappa_deviation = fleiss_kappa_simulation - study_one_original_irr_value_target,
fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_one_original_irr_value_target
%>%
) ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0 # Ensures color transition at zero
+
) theme_bw() +
labs(
title = "Effect of Varying Progression and Response Thresholds on IRR",
subtitle = "RECIST 1.1 *Target* Tumor Tumor Response in Study 1",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Kappa\nDeviation"
)
###################################### OVERALL OUTCOME SIMULATION
# irr_simulation_overall_outcome_study_one <- tr_study_one_simulation_data %>%
# map(~{
# .x %>%
# select(USUBJID, RSDY, RSEVALID, overall_response_calculated ) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response_calculated ) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
#
#
# saveRDS(irr_simulation_overall_outcome_study_one, "./simulate_thresholds/irr_simulation_overall_outcome_study_one.rds")
if (!"irr_simulation_overall_outcome_study_one" %in% ls()){
<- readRDS("./simulate_thresholds/irr_simulation_overall_outcome_study_one.rds")
irr_simulation_overall_outcome_study_one
}
<- irr_simulation_overall_outcome_study_one[[study_one_original_irr_id]]$value
study_one_original_irr_value_overall
%>%
all_combos mutate(
fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_one, ~.x[["value"]]),
fleiss_kappa_deviation = fleiss_kappa_simulation - study_one_original_irr_value_overall,
fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_one_original_irr_value_overall
%>%
) ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0 # Ensures color transition at zero
+
) theme_bw() +
labs(
title = "Effect of Varying Progression and Response Thresholds on IRR",
subtitle = "RECIST 1.1 *Overall* Response in Study 1",
# caption = "The intersecting black lines at x=0.20 and y=0.30 represent the actual RECIST threshold values (i.e. 20% disease progression and 30% response/decline in SLD). The values for the heatmap are thus the difference between the IRR value at the actual RECIST criteria values and the value calculated at the given combination of disease progression and disease response. This Overall Response plot also incorporates information from New Target and Non-Target Lesions as well as the recalculated Target Lesion values.",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Kappa\nDeviation"
)
B.6.0.2 thesis_code/simulate_thresholds/irr_sim_study_three.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_three_simulation_data" %in% ls()){
# tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
#
<- list()
study_three_simulation #
#
# ################################ TARGET OUTCOME SIMULATION
#
#
# irr_simulation_target_outcome_study_three <- tr_study_three_simulation_data %>%
# map(~{
# .x %>%
# select(USUBJID, RSDY, RSEVALID, target_response_calculated) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
# saveRDS(irr_simulation_target_outcome_study_three, "./simulate_thresholds/irr_simulation_target_outcome_study_three.rds")
#
if (!"irr_simulation_target_outcome_study_three" %in% ls()){
<- readRDS("./simulate_thresholds/irr_simulation_target_outcome_study_three.rds")
irr_simulation_target_outcome_study_three
}
<- all_combos %>%
study_three_original_irr_id mutate(
id = row_number()
%>%
) ::filter(
dplyr== 0.2,
progression_threshold == 0.3
partial_response_threshold %>%
) pull(id)
<- irr_simulation_target_outcome_study_three[[study_three_original_irr_id]]$value
study_three_original_irr_value_target
$plot_target_outcome <- all_combos %>%
study_three_simulationmutate(
fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome_study_three, ~.x[["value"]]),
fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_target,
fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_target
%>%
) ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0 # Ensures color transition at zero
+
) theme_bw() +
labs(
title = "Effect of Varying Progression and Response Thresholds on IRR",
subtitle = "RECIST 1.1 *Target* Response in Study 3"
)
###################################### OVERALL OUTCOME SIMULATION
# irr_simulation_overall_outcome_study_three <- tr_study_three_simulation_data %>%
# map(~{
# .x %>%
# select(USUBJID, RSDY, RSEVALID, overall_response) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
#
# saveRDS(irr_simulation_overall_outcome_study_three, "./simulate_thresholds/irr_simulation_overall_outcome_study_three.rds")
if (!"irr_simulation_overall_outcome_study_three" %in% ls()){
<- readRDS("./simulate_thresholds/irr_simulation_overall_outcome_study_three.rds")
irr_simulation_overall_outcome_study_three
}
<- irr_simulation_overall_outcome_study_three[[study_three_original_irr_id]]$value
study_three_original_irr_value_overall
$plot_overall_outcome <- all_combos %>%
study_three_simulationmutate(
fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_three, ~.x[["value"]]),
fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_overall,
fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_overall
%>%
) ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0 # Ensures color transition at zero
+
) theme_bw() +
labs(
title = "Effect of Varying Progression and Response Thresholds on IRR",
subtitle = "RECIST 1.1 *Overall* Response in Study 3")
study_three_simulation
# saveRDS(study_three_simulation, "./presentation/study_three_simulation.rds")
B.6.0.3 thesis_code/simulate_thresholds/irr_sim_study_two.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_two_simulation_data" %in% ls()){
# tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }
#
# ################################ TARGET OUTCOME SIMULATION
#
#
#
# irr_simulation_target_outcome_study_two <- tr_study_two_simulation_data %>%
# map(~{
# .x %>%
# select(UNI_ID.x, RSDY, RSEVALID, target_response_calculated ) %>%
# tidyr::unite("Unique_Observation", c("UNI_ID.x", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
# saveRDS(irr_simulation_target_outcome_study_two, "./simulate_thresholds/irr_simulation_target_outcome_study_two.rds")
if (!"irr_simulation_target_outcome_study_two" %in% ls()){
<- readRDS("./simulate_thresholds/irr_simulation_target_outcome_study_two.rds")
irr_simulation_target_outcome_study_two
}
<- all_combos %>%
study_two_original_irr_id mutate(
id = row_number()
%>%
) ::filter(
dplyr== 0.2,
progression_threshold == 0.3
partial_response_threshold %>%
) pull(id)
<- irr_simulation_target_outcome_study_two[[study_two_original_irr_id]]$value
study_two_original_irr_value_target
%>%
all_combos mutate(
fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome_study_two, ~.x[["value"]]),
fleiss_kappa_deviation = fleiss_kappa_simulation - study_two_original_irr_value_target,
fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_two_original_irr_value_target
%>%
) ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0 # Ensures color transition at zero
+
) theme_bw() +
labs(
title = "Effect of Varying Progression and Response Thresholds on IRR",
subtitle = "RECIST 1.1 *Target* Tumor Tumor Response in Study 2"
)
###################################### OVERALL OUTCOME SIMULATION
#
# irr_simulation_overall_outcome_study_two <- tr_study_two_simulation_data %>%
# map(~{
# .x %>%
# select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated ) %>%
# tidyr::unite("Unique_Observation", c("UNI_ID.x", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response_calculated ) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
#
#
# saveRDS(irr_simulation_overall_outcome_study_two, "./simulate_thresholds/irr_simulation_overall_outcome_study_two.rds")
if (!"irr_simulation_overall_outcome_study_two" %in% ls()){
<- readRDS("./simulate_thresholds/irr_simulation_overall_outcome_study_two.rds")
irr_simulation_overall_outcome_study_two
}
<- irr_simulation_overall_outcome_study_two[[study_two_original_irr_id]]$value
study_two_original_irr_value_overall
%>%
all_combos mutate(
fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_two, ~.x[["value"]]),
fleiss_kappa_deviation = fleiss_kappa_simulation - study_two_original_irr_value_overall,
fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_two_original_irr_value_overall
%>%
) ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0 # Ensures color transition at zero
+
) theme_bw() +
labs(
title = "Effect of Varying Progression and Response Thresholds on IRR",
subtitle = "RECIST 1.1 *Overall* Response in Study 2",
# caption = "The intersecting black lines at x=0.20 and y=0.30 represent the actual RECIST threshold values (i.e. 20% disease progression and 30% response/decline in SLD). The values for the heatmap are thus the difference between the IRR value at the actual RECIST criteria values and the value calculated at the given combination of disease progression and disease response. This Overall Response plot also incorporates information from New Target and Non-Target Lesions as well as the recalculated Target Lesion values."
)
B.6.0.4 thesis_code/simulate_thresholds/orr_sim_study_one.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
if (!"tr_study_one_simulation_data" %in% ls()){
<- readRDS("tr_study_one_simulation_data.rds")
tr_study_one_simulation_data
}
<- map(tr_study_one_simulation_data, ~{
orr_study_one_sims <- .x %>%
temp group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
summarize(
responder = any(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"))
%>%
) ungroup() %>%
pivot_wider(names_from = RSEVALID, values_from = responder) %>%
select(-1) %>%
as.matrix() %>%
na.omit() %>%
::cochrans.q()
nonparreturn(c(q = temp@TestStat, p = temp@PVal))
})
%>%
orr_study_one_sims map_dbl(~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
# saveRDS(orr_study_one_sims, "simulate_thresholds/orr_study_one_sims.rds")
if (!"orr_study_one_sims" %in% ls()) {
<- readRDS("simulate_thresholds/orr_study_one_sims.rds")
orr_study_one_sims
}
tibble(
p_values = map_dbl(orr_study_one_sims, ~.x[["p"]] %>% str_remove("The p-value is ") %>% as.numeric()),
statistic = map_dbl(orr_study_one_sims, ~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
%>%
) bind_cols(all_combos) %>%
ggplot(aes(progression_threshold, partial_response_threshold, fill = p_values)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0.05 # Ensures color transition at zero
+
) labs(
title = "NCT02395172: Chi-Squared Test p-values of Objective Response Rate",
subtitle = "Comparison between Site Investigator, Central Reviewer 1, and Central Reviewer 2",
x = "Disease Progression Threshold",
y = "Parial Response Threshold"
+
) theme_bw()
B.6.0.5 thesis_code/simulate_thresholds/orr_sim_study_three.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
if (!"tr_study_three_simulation_data" %in% ls()){
<- readRDS("tr_study_three_simulation_data.rds")
tr_study_three_simulation_data
}
<- map(tr_study_three_simulation_data, ~{
orr_study_three_sims <- .x %>%
temp group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
summarize(
responder = any(overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"))
%>%
) ungroup() %>%
pivot_wider(names_from = RSEVALID, values_from = responder) %>%
select(-1) %>%
as.matrix() %>%
na.omit() %>%
::cochrans.q()
nonparreturn(c(q = temp@TestStat, p = temp@PVal))
})
%>%
orr_study_three_sims map_dbl(~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
# saveRDS(orr_study_three_sims, "simulate_thresholds/orr_study_three_sims.rds")
if (!"orr_study_three_sims" %in% ls()) {
<- readRDS("simulate_thresholds/orr_study_three_sims.rds")
orr_study_three_sims
}
tibble(
p_values = map_dbl(orr_study_three_sims, ~.x[["p"]] %>% str_remove("The p-value is ") %>% as.numeric()),
statistic = map_dbl(orr_study_three_sims, ~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
%>%
) bind_cols(all_combos) %>%
ggplot(aes(progression_threshold, partial_response_threshold, fill = p_values)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0.05 # Ensures color transition at zero
+
) labs(
title = "NCT03631706: Chi-Squared Test p-values of Objective Response Rate",
subtitle = "Comparison between Site Investigator, Central Reviewer 1, and Central Reviewer 2",
x = "Disease Progression Threshold",
y = "Parial Response Threshold"
+
) theme_bw()
B.6.0.6 thesis_code/simulate_thresholds/orr_sim_study_two.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
if (!"tr_study_two_simulation_data" %in% ls()){
<- readRDS("tr_study_two_simulation_data.rds")
tr_study_two_simulation_data
}
<- map(tr_study_two_simulation_data, ~{
orr_study_two_sims <- .x %>%
temp group_by(across(any_of(c("USUBJID", "UNI_ID.x"))), RSEVALID) %>%
summarize(
responder = any(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"))
%>%
) ungroup() %>%
pivot_wider(names_from = RSEVALID, values_from = responder) %>%
select(-1) %>%
as.matrix() %>%
na.omit() %>%
::cochrans.q()
nonparreturn(c(q = temp@TestStat, p = temp@PVal))
})
%>%
orr_study_two_sims map_dbl(~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric()) %>%
sd()
# saveRDS(orr_study_two_sims, "simulate_thresholds/orr_study_two_sims.rds")
if (!"orr_study_two_sims" %in% ls()) {
<- readRDS("simulate_thresholds/orr_study_two_sims.rds")
orr_study_two_sims
}
tibble(
p_values = map_dbl(orr_study_two_sims, ~.x[["p"]] %>% str_remove("The p-value is ") %>% as.numeric()),
statistic = map_dbl(orr_study_two_sims, ~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
%>%
) bind_cols(all_combos) %>%
ggplot(aes(progression_threshold, partial_response_threshold, fill = p_values)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red", # Color for positive values
midpoint = 0.05 # Ensures color transition at zero
+
) labs(
title = "NCT03434379: Chi-Squared Test p-values of Objective Response Rate",
subtitle = "Comparison between Site Investigator, Central Reviewer 1, and Central Reviewer 2",
x = "Disease Progression Threshold",
y = "Parial Response Threshold"
+
) theme_bw()
B.6.0.7 thesis_code/simulate_thresholds/survival_response_duration_sim_study_one.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_one_simulation_data" %in% ls()){
# tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_duration_response_function.R")
# study_one_resonse_duration_sim <- map(tr_study_one_simulation_data, ~{
# .x %>%
# group_by(USUBJID, RSEVALID) %>%
# select(USUBJID, RSDY, RSEVALID, overall_response_calculated ) %>%
# mutate(
# first_response = (overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)") & cumsum(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) == 1 ),
# first_disease_progression = (overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)") & cumsum(overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) == 1),
# end_date = first_disease_progression | (all(!overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(any(first_response == TRUE)) %>%
# dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
# mutate(
# time_interval = RSDY - dplyr::lag(RSDY)
# ) %>%
# dplyr::filter(!is.na(time_interval)) %>%
# list() %>%
# survival_duration_response(plot = F)
# }
# )
#
#
#
#
# saveRDS(study_one_resonse_duration_sim, "simulate_thresholds/study_one_resonse_duration_sim.rds")
if (!"study_one_resonse_duration_sim" %in% ls()) {
<- readRDS("simulate_thresholds/study_one_resonse_duration_sim.rds")
study_one_resonse_duration_sim
}
1]] %>%
study_one_resonse_duration_sim[[map(~map(.x["emmeans_result"], ~{
as.data.frame(.x$contrasts)
%>%
})) unlist(recursive = FALSE) %>%
list_rbind(names_to = "id") %>%
mutate(id = str_remove(id, ".emmeans_result") %>%
paste0(., ": ", contrast)
%>%
) ::filter()
dplyr
coef(study_one_resonse_duration_sim[[100]][[1]]$cox_model)
<- map_df(study_one_resonse_duration_sim, ~{
study_one_resonse_duration_sim_coefs tryCatch(
coef(.x[[1]][["cox_model"]]),
error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_one_resonse_duration_sim_coefs %>%
study_one_actual_average_difference_dor ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_one_resonse_duration_sim_coefs mutate(
difference_from_actual = study_one_actual_average_difference_dor - average
)
saveRDS(study_one_resonse_duration_sim_coefs,
"./transfer/simulations/dor/study_one_dor_emmeans.rds"
)
%>%
study_one_resonse_duration_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 1 -- Duration of Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
#### FILTER FOR FEWER THAN 20 EVENTS
<- map_df(study_one_resonse_duration_sim, ~{
study_one_resonse_duration_sim_coefs_filtered tryCatch({
<- .x[[1]][["cox_model"]]
cox_model if (cox_model$nevent < 20) {
return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
else {
} return(coef(cox_model))
}
},error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_one_resonse_duration_sim_coefs %>%
study_one_actual_average_difference_filtered_dor ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_one_resonse_duration_sim_coefs_filtered mutate(
difference_from_actual = study_one_actual_average_difference_filtered_dor - average
)
saveRDS(study_one_resonse_duration_sim_coefs_filtered,
"./transfer/simulations/dor/study_one_dor_filtered_emmeans.rds"
)
%>%
study_one_resonse_duration_sim_coefs_filtered ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 1 -- Duration of Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.8 thesis_code/simulate_thresholds/survival_response_duration_sim_study_three.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_three_simulation_data" %in% ls()){
# tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_duration_response_function.R")
# study_three_resonse_duration_sim <- map(tr_study_three_simulation_data, ~{
# .x %>%
# group_by(USUBJID, RSEVALID) %>%
# select(USUBJID, RSDY, RSEVALID, overall_response ) %>%
# mutate(
# first_response = (overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)") & cumsum(overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) == 1 ),
# first_disease_progression = (overall_response %in% c("PROGRESSIVE DISEASE (PD)") & cumsum(overall_response %in% c("PROGRESSIVE DISEASE (PD)")) == 1),
# end_date = first_disease_progression | (all(!overall_response %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(any(first_response == TRUE)) %>%
# dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
# mutate(
# time_interval = RSDY - dplyr::lag(RSDY)
# ) %>%
# dplyr::filter(!is.na(time_interval)) %>%
# list() %>%
# survival_duration_response(plot = F)
# }
# )
#
#
#
#
# saveRDS(study_three_resonse_duration_sim, "simulate_thresholds/study_three_resonse_duration_sim.rds")
if (!"study_three_resonse_duration_sim" %in% ls()) {
<- readRDS("simulate_thresholds/study_three_resonse_duration_sim.rds")
study_three_resonse_duration_sim
}
1]] %>%
study_three_resonse_duration_sim[[map(~map(.x["emmeans_result"], ~{
as.data.frame(.x$contrasts)
%>%
})) unlist(recursive = FALSE) %>%
list_rbind(names_to = "id") %>%
mutate(id = str_remove(id, ".emmeans_result") %>%
paste0(., ": ", contrast)
%>%
) ::filter()
dplyr
coef(study_three_resonse_duration_sim[[100]][[1]]$cox_model)
<- map_df(study_three_resonse_duration_sim, ~{
study_three_resonse_duration_sim_coefs tryCatch(
coef(.x[[1]][["cox_model"]]),
error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_three_resonse_duration_sim_coefs %>%
study_three_actual_average_difference_dor ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_three_resonse_duration_sim_coefs mutate(
difference_from_actual = study_three_actual_average_difference_dor - average
)
saveRDS(study_three_resonse_duration_sim_coefs,
"./transfer/simulations/dor/study_three_dor_emmeans.rds"
)
%>%
study_three_resonse_duration_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 3 -- Duration of Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
#### FILTER FOR FEWER THAN 20 EVENTS
<- map_df(study_three_resonse_duration_sim, ~{
study_three_resonse_duration_sim_coefs_filtered tryCatch({
<- .x[[1]][["cox_model"]]
cox_model if (cox_model$nevent < 20) {
return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
else {
} return(coef(cox_model))
}
},error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_three_resonse_duration_sim_coefs %>%
study_three_actual_average_difference_filtered_dor ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_three_resonse_duration_sim_coefs_filtered mutate(
difference_from_actual = study_three_actual_average_difference_filtered_dor - average
)
saveRDS(study_three_resonse_duration_sim_coefs_filtered,
"./transfer/simulations/dor/study_three_dor_filtered_emmeans.rds"
)
%>%
study_three_resonse_duration_sim_coefs_filtered ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 3 -- Duration of Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.9 thesis_code/simulate_thresholds/survival_response_duration_sim_study_two.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_two_simulation_data" %in% ls()){
# tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_duration_response_function.R")
# study_two_resonse_duration_sim <- map(tr_study_two_simulation_data, ~{
# .x %>%
# group_by(UNI_ID.x, RSEVALID) %>%
# select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated ) %>%
# mutate(
# first_response = (overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)") & cumsum(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) == 1 ),
# first_disease_progression = (overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)") & cumsum(overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) == 1),
# end_date = first_disease_progression | (all(!overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(any(first_response == TRUE)) %>%
# dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
# mutate(
# time_interval = RSDY - dplyr::lag(RSDY)
# ) %>%
# dplyr::filter(!is.na(time_interval)) %>%
# list() %>%
# survival_duration_response(plot = F)
# }
# )
#
#
#
#
# saveRDS(study_two_resonse_duration_sim, "simulate_thresholds/study_two_resonse_duration_sim.rds")
if (!"study_two_resonse_duration_sim" %in% ls()) {
<- readRDS("simulate_thresholds/study_two_resonse_duration_sim.rds")
study_two_resonse_duration_sim
}
1]] %>%
study_two_resonse_duration_sim[[map(~map(.x["emmeans_result"], ~{
as.data.frame(.x$contrasts)
%>%
})) unlist(recursive = FALSE) %>%
list_rbind(names_to = "id") %>%
mutate(id = str_remove(id, ".emmeans_result") %>%
paste0(., ": ", contrast)
%>%
) ::filter()
dplyr
coef(study_two_resonse_duration_sim[[100]][[1]]$cox_model)
<- map_df(study_two_resonse_duration_sim, ~{
study_two_resonse_duration_sim_coefs tryCatch(
coef(.x[[1]][["cox_model"]]),
error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_two_resonse_duration_sim_coefs %>%
study_two_actual_average_difference_dor ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_two_resonse_duration_sim_coefs mutate(
difference_from_actual = study_two_actual_average_difference_dor - average
)
saveRDS(study_two_resonse_duration_sim_coefs,
"./transfer/simulations/dor/study_two_dor_emmeans.rds"
)
%>%
study_two_resonse_duration_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 2 -- Duration of Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
#### FILTER FOR FEWER THAN 20 EVENTS
<- map_df(study_two_resonse_duration_sim, ~{
study_two_resonse_duration_sim_coefs_filtered tryCatch({
<- .x[[1]][["cox_model"]]
cox_model if (cox_model$nevent < 20) {
return(tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA))
else {
} return(coef(cox_model))
}
},error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_two_resonse_duration_sim_coefs %>%
study_two_actual_average_difference_filtered_dor ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_two_resonse_duration_sim_coefs_filtered mutate(
difference_from_actual = study_two_actual_average_difference_filtered_dor - average
)
saveRDS(study_two_resonse_duration_sim_coefs_filtered,
"./transfer/simulations/dor/study_two_dor_filtered_emmeans.rds"
)
# Note that only 17 events were observed for the actual RECIST guideline
# and calculations for the difference from there are based on the original value
# in the plot even though that space is greyed out in the filtered plot.
%>%
study_two_resonse_duration_sim_coefs_filtered ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 2 -- Duration of Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.10 thesis_code/simulate_thresholds/survival_time_progression_sim_study_one.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_one_simulation_data" %in% ls()){
# tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_time_progression_function.R")
#
# study_one_progression_sim <- map(tr_study_one_simulation_data, ~{
# .x %>%
# group_by(USUBJID, RSEVALID) %>%
# select(USUBJID, RSDY, RSEVALID, overall_response_calculated) %>%
# dplyr::filter(
# (overall_response_calculated == "PROGRESSIVE DISEASE (PD)") |
# (overall_response_calculated != "PROGRESSIVE DISEASE (PD)" & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(RSDY == min(RSDY)) %>%
# arrange(USUBJID, RSEVALID, RSDY) %>%
# mutate(
# general_status = if_else(overall_response_calculated == "PROGRESSIVE DISEASE (PD)", 1, 0),
# RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
# ) %>%
# list() %>%
# survival_time_progression(plot = F)
# }
# )
#
#
#
# saveRDS(study_one_progression_sim, "simulate_thresholds/study_one_progression_sim.rds")
if (!"study_one_progression_sim" %in% ls()) {
<- readRDS("simulate_thresholds/study_one_progression_sim.rds")
study_one_progression_sim
}
<- map_df(study_one_progression_sim, ~{
study_one_progression_sim_coefs tryCatch(
coef(.x[[1]][["cox_model"]]),
error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_one_progression_sim_coefs %>%
study_one_actual_average_difference_progression ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_one_progression_sim_coefs mutate(
difference_from_actual = study_one_actual_average_difference - average
)
saveRDS(study_one_progression_sim_coefs,
"./transfer/simulations/progression/study_one_progression_emmeans.rds"
)
%>%
study_one_progression_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 1 -- Time to Progression: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.11 thesis_code/simulate_thresholds/survival_time_progression_sim_study_three.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_three_simulation_data" %in% ls()){
# tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_time_progression_function.R")
# study_three_progression_sim <- map(tr_study_three_simulation_data, ~{
# .x %>%
# arrange(USUBJID, RSDY) %>%
# group_by(USUBJID, RSEVALID) %>%
# select(USUBJID, RSDY, RSEVALID, overall_response) %>%
# dplyr::filter(
# (overall_response %in% c("PROGRESSIVE DISEASE (PD)")) |
# (all(!overall_response %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(RSDY == min(RSDY)) %>%
# arrange(USUBJID, RSEVALID, RSDY) %>%
# mutate(
# general_status = if_else(overall_response %in% c("PROGRESSIVE DISEASE (PD)"), 1, 0),
# RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
# ) %>%
# list() %>%
# survival_time_progression(plot = F)
# }
# )
# saveRDS(study_three_progression_sim, "simulate_thresholds/study_three_progression_sim.rds")
if (!"study_three_progression_sim" %in% ls()) {
<- readRDS("simulate_thresholds/study_three_progression_sim.rds")
study_three_progression_sim
}
<- map_df(study_three_progression_sim, ~{
study_three_progression_sim_coefs tryCatch(
coef(.x[[1]][["cox_model"]]),
error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_three_progression_sim_coefs %>%
study_three_actual_average_difference ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_three_progression_sim_coefs mutate(
difference_from_actual = study_three_actual_average_difference - average
)
saveRDS(study_three_progression_sim_coefs,
"./transfer/simulations/progression/study_three_progression_emmeans.rds"
)
%>%
study_three_progression_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 3 -- Time to Progression: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.12 thesis_code/simulate_thresholds/survival_time_progression_sim_study_two.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_two_simulation_data" %in% ls()){
# tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_time_progression_function.R")
# study_two_progression_sim <- map(tr_study_two_simulation_data, ~{
# .x %>%
# arrange(UNI_ID.x, RSDY) %>%
# group_by(UNI_ID.x, RSEVALID) %>%
# select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated) %>%
# dplyr::filter(
# (overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) |
# (all(!overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(RSDY == min(RSDY)) %>%
# arrange(UNI_ID.x, RSEVALID, RSDY) %>%
# mutate(
# general_status = if_else(overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)"), 1, 0),
# RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "RADIOLOGIST 1", "RADIOLOGIST 2"))
# ) %>%
# list() %>%
# survival_time_progression(plot = F)
# }
# )
# saveRDS(study_two_progression_sim, "simulate_thresholds/study_two_progression_sim.rds")
if (!"study_two_progression_sim" %in% ls()) {
<- readRDS("simulate_thresholds/study_two_progression_sim.rds")
study_two_progression_sim
}
<- map_df(study_two_progression_sim, ~{
study_two_progression_sim_coefs tryCatch(
coef(.x[[1]][["cox_model"]]),
error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_two_progression_sim_coefs %>%
study_two_actual_average_difference ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_two_progression_sim_coefs mutate(
difference_from_actual = study_two_actual_average_difference - average
)
saveRDS(study_two_progression_sim_coefs,
"./transfer/simulations/progression/study_two_progression_emmeans.rds"
)
%>%
study_two_progression_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 2 -- Time to Progression: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.13 thesis_code/simulate_thresholds/survival_time_response_sim_study_one.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_one_simulation_data" %in% ls()){
# tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_time_response_function.R")
# study_one_time_response_sim <- map(tr_study_one_simulation_data, ~{
# .x %>%
# group_by(USUBJID, RSEVALID) %>%
# select(USUBJID, RSDY, RSEVALID, overall_response_calculated ) %>%
# dplyr::filter(
# (overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) |
# (all(!overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(RSDY == min(RSDY)) %>%
# arrange(USUBJID, RSEVALID, RSDY) %>%
# mutate(
# general_status = if_else(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"), 1, 0),
# RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
# ) %>%
# list()
# }
# )
#
# study_one_time_response_sim %<>%
# map(~survival_time_response(.x, plot = F))
#
# saveRDS(study_one_time_response_sim, "simulate_thresholds/study_one_time_response_sim.rds")
if (!"study_one_time_response_sim.rds" %in% ls()) {
<- readRDS("simulate_thresholds/study_one_time_response_sim.rds")
study_one_time_response_sim
}
<- map_df(study_one_time_response_sim, ~{
study_one_time_response_sim_coefs tryCatch({
coef(.x[[1]][["cox_model"]])
},error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_one_time_response_sim_coefs %>%
study_one_actual_average_difference_response ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_one_time_response_sim_coefs mutate(
difference_from_actual = study_one_actual_average_difference_response - average
)
saveRDS(study_one_time_response_sim_coefs,
"./transfer/simulations/response/study_one_response_emmeans.rds"
)
%>%
study_one_time_response_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 1 -- Time to Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
#### FILTER FOR FEWER THAN 20 EVENTS
<- map_df(study_one_time_response_sim, ~{
study_one_time_response_sim_coefs_filtered tryCatch({
<- .x[[1]][["cox_model"]]
cox_model if (cox_model$nevent < 20) {
return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
else {
} return(coef(cox_model))
}
},error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_one_time_response_sim_coefs %>%
study_one_actual_average_difference_filtered_response ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_one_time_response_sim_coefs_filtered mutate(
difference_from_actual = study_one_actual_average_difference_filtered_response - average
)
saveRDS(study_one_time_response_sim_coefs_filtered,
"./transfer/simulations/response/study_one_response_filtered_emmeans.rds"
)
%>%
study_one_time_response_sim_coefs_filtered ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 1 -- Time to Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.14 thesis_code/simulate_thresholds/survival_time_response_sim_study_three.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_three_simulation_data" %in% ls()){
# tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_time_response_function.R")
# study_three_time_response_sim <- map(tr_study_three_simulation_data, ~{
# .x %>%
# group_by(USUBJID, RSEVALID) %>%
# select(USUBJID, RSDY, RSEVALID, overall_response) %>%
# dplyr::filter(
# (overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) |
# (all(!overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(RSDY == min(RSDY)) %>%
# arrange(USUBJID, RSEVALID, RSDY) %>%
# mutate(
# general_status = if_else(overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"), 1, 0),
# RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
# ) %>%
# list() %>%
# survival_time_response(plot = F)
# }
# )
#
#
#
#
# saveRDS(study_three_time_response_sim, "simulate_thresholds/study_three_time_response_sim.rds")
if (!"study_three_time_response_sim.rds" %in% ls()) {
<- readRDS("simulate_thresholds/study_three_time_response_sim.rds")
study_three_time_response_sim
}
# some of the results are wild at the high partial response threshold values
# and we should consider setting a minimum number of events to have a valid
# analysis of the HRs
1200]][[1]]$cox_model$nevent
study_three_time_response_sim[[
<- map_df(study_three_time_response_sim, ~{
study_three_time_response_sim_coefs tryCatch({
coef(.x[[1]][["cox_model"]])
},error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_three_time_response_sim_coefs %>%
study_three_actual_average_difference_response ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_three_time_response_sim_coefs mutate(
difference_from_actual = study_three_actual_average_difference_response - average
)
saveRDS(study_three_time_response_sim_coefs,
"./transfer/simulations/response/study_three_response_emmeans.rds"
)
%>%
study_three_time_response_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 3 -- Time to Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
#### FILTER FOR FEWER THAN 20 EVENTS
<- map_df(study_three_time_response_sim, ~{
study_three_time_response_sim_coefs_filtered tryCatch({
<- .x[[1]][["cox_model"]]
cox_model if (cox_model$nevent < 20) {
return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
else {
} return(coef(cox_model))
}
},error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_three_time_response_sim_coefs %>%
study_three_actual_average_difference_filtered_response ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_three_time_response_sim_coefs_filtered mutate(
difference_from_actual = study_three_actual_average_difference_filtered_response - average
)
saveRDS(study_three_time_response_sim_coefs_filtered,
"./transfer/simulations/response/study_three_response_filtered_emmeans.rds"
)
%>%
study_three_time_response_sim_coefs_filtered ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 3 -- Time to Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.15 thesis_code/simulate_thresholds/survival_time_response_sim_study_two.R
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# if (!"tr_study_two_simulation_data" %in% ls()){
# tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }
library(survival)
library(survminer)
source("survival_analyses/survival_time_response_function.R")
# study_two_time_response_sim <- map(tr_study_two_simulation_data, ~{
# .x %>%
# group_by(UNI_ID.x, RSEVALID) %>%
# select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated ) %>%
# dplyr::filter(
# (overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) |
# (all(!overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) & RSDY == max(RSDY))
# ) %>%
# dplyr::filter(RSDY == min(RSDY)) %>%
# arrange(UNI_ID.x, RSEVALID, RSDY) %>%
# mutate(
# general_status = if_else(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"), 1, 0),
# RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "RADIOLOGIST 1", "RADIOLOGIST 2"))
# ) %>%
# list()
# }
# )
#
#
# study_two_time_response_sim %<>%
# map(~survival_time_response(.x, plot = F))
#
#
#
# saveRDS(study_two_time_response_sim, "simulate_thresholds/study_two_time_response_sim.rds")
if (!"study_two_time_response_sim.rds" %in% ls()) {
<- readRDS("simulate_thresholds/study_two_time_response_sim.rds")
study_two_time_response_sim
}
<- map_df(study_two_time_response_sim, ~{
study_two_time_response_sim_coefs tryCatch({
coef(.x[[1]][["cox_model"]])
},error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_two_time_response_sim_coefs %>%
study_two_actual_average_difference_response ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_two_time_response_sim_coefs mutate(
difference_from_actual = study_two_actual_average_difference_response - average
)
saveRDS(study_two_time_response_sim_coefs,
"./transfer/simulations/response/study_two_response_emmeans.rds"
)
%>%
study_two_time_response_sim_coefs ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 2 -- Time to Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
#### FILTER FOR FEWER THAN 20 EVENTS
<- map_df(study_two_time_response_sim, ~{
study_two_time_response_sim_coefs_filtered tryCatch({
<- .x[[1]][["cox_model"]]
cox_model if (cox_model$nevent < 20) {
return(tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA))
else {
} return(coef(cox_model))
}
},error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
)%>%
}) mutate(average = rowMeans(.)) %>%
bind_cols(all_combos)
<- study_two_time_response_sim_coefs %>%
study_two_actual_average_difference_filtered_response ::filter(
dplyr== 0.20,
progression_threshold == 0.30
partial_response_threshold %>%
) pull(average)
%<>%
study_two_time_response_sim_coefs_filtered mutate(
difference_from_actual = study_two_actual_average_difference_filtered_response - average
)
saveRDS(study_two_time_response_sim_coefs_filtered,
"./transfer/simulations/response/study_two_response_filtered_emmeans.rds"
)
%>%
study_two_time_response_sim_coefs_filtered ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
geom_tile() +
# scale_fill_distiller(palette = "RdBu") +
geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
scale_fill_gradient2(
low = "blue", # Color for negative values
mid = "white", # Color at 0
high = "red"
+
) theme_bw() +
labs(
title = "Study 2 -- Time to Response: Differences in Hazard Ratios",
subtitle = "Site Investigator vs. Averaged Central Reviewer",
x = "Disease Progression Threshold",
y = "Partial Response Threshold",
fill = "Diff from\nActual"
)
B.6.0.16 thesis_code/simulate_thresholds/tr_data_study_one.R
library(tidyr)
if (!"rs_files_with_multiple_raters" %in% ls()){
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
}
if (!"tr_where_rs_files" %in% ls()){
<- readRDS("tr_where_rs_files.rds")
tr_where_rs_files
}
<- function(x) {
get_mode # If the vector has only one unique value, return it directly
if (length(unique(na.omit(x))) == 1) {
return(unique(na.omit(x)))
}
# Otherwise, calculate the mode
<- unique(x)
uniq_vals which.max(tabulate(match(x, uniq_vals)))]
uniq_vals[
}
<- function(x) {
cummin_na replace_na(x, Inf) %>% cummin() %>% replace(. == Inf, NA)
}
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
# I need/want to find the New Lesion and Non-Target Lesion info for
# the Oncologist. Only Overall Response (sometimes listed Clinical Response)
# is available in the RS domain data
1]] %>%
rs_files_with_multiple_raters[[count(RSCAT, RSEVAL, RSEVALID, RSTEST, RSSTAT) %>%
View()
1]] %>%
rs_files_with_multiple_raters[[::filter(
dplyr== "" | (RSEVALID == "ONCOLOGIST" & RSSTAT == "NOT DONE")
RSEVALID != "CLINICAL ASSESSMENT"
, RSCAT %>%
) View()
1]] %>%
rs_files_with_multiple_raters[[::filter(
dplyr!= ""
RSEVALID %>%
) View()
list.files("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted")
::read_xpt("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted/ts.xpt") %>%
havenView()
# None of the supp tables have useful info
# study_one_supp_tables <- list.files("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted", full.names = T) %>%
# grep("supp", . , value = T) %>%
# map(haven::read_xpt)
#
# View(study_one_supp_tables[[17]])
<- list.files("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted", full.names = T) %>%
study_one_nonsupp_tables grep(".xpt", . , value = T) %>%
grep("supp", . , value = T, invert = T) %>%
map(haven::read_xpt)
# This is just the TR Domain file
24]] %>% View()
study_one_nonsupp_tables[[
# Will probably need to take the TR file, filter for investigator NEW and NT
# data and then craft a data set with that which can then be joined to the
# data for the Readers
1]] %>%
tr_where_rs_files[[count(TREVAL, TREVALID, TRLNKGRP, TRORRES, ) %>%
::filter(TREVAL == "INVESTIGATOR") %>%
dplyrView()
######### GENERAL TR DATA
<- tr_where_rs_files[[1]] %>%
study_one_sld_data ::filter(
dplyr%in% c("SUMDIAM", "SAXIS")
TRTESTCD %>%
) mutate(
TRGRPID_trunc = str_extract(TRGRPID, "A[[:digit:]]{1,3}"),
TREVALID = if_else(TREVALID == "", TREVAL, TREVALID)
%>%
) group_by(USUBJID, TRGRPID_trunc) %>%
mutate(TRDY_sub = get_mode(TRDY)) %>%
ungroup() %>%
mutate(
TRSTRESC = case_when(
%in% c("TOO SMALL TO MEASURE (5 MM TO BE USED FOR CALCULATION)", "TOO SMALL TO MEASURE") ~ "5",
TRORRES TRUE ~ TRSTRESC
)%>%
) select(USUBJID, TRDY_sub, TREVALID, TRTESTCD, TRSTRESC) %>%
arrange(USUBJID, TRDY_sub, TREVALID) %>%
::pivot_wider(
tidyrnames_from = TRTESTCD,
values_from = TRSTRESC,
values_fn = ~sum(as.numeric(.x))
%>%
) # select(USUBJID, TRTEST, TRORRES, SUMDIAM, TREVALID, TRDY_sub) %>%
mutate(
SUMDIAM_num = as.numeric(SUMDIAM),
SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num)
%>%
) ::filter(!is.na(SUMDIAM)) %>%
dplyrmutate(
SUMDIAM_num = as.numeric(SUMDIAM),
SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num),
SAXIS_num = as.numeric(SAXIS),
SAXIS_num = if_else(SAXIS %in% c("TOO SMALL TO MEASURE"), 0, SAXIS_num)
%>%
) group_by(USUBJID, TREVALID) %>%
mutate(
baseline_value = first(SUMDIAM_num),
mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
nadir_cummin_value = cummin_na(SUMDIAM_num)
%>%
) mutate(
percent_change_from_nadir = if_else(
!is.na(mm_change_from_previous),
- lag(nadir_cummin_value))/lag(nadir_cummin_value),
(SUMDIAM_num NA_real_
)%>%
) mutate(
baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
progressive_disease = (percent_change_from_nadir > 0.2) & ((SUMDIAM_num - nadir_cummin_value) > 5),
partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
complete_response = SUMDIAM_num == 0
%>%
) mutate(
target_response_calculated = case_when(
# SUMNLNLD
== "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
SUMDIAM == 0 ~ "COMPLETE RESPONSE (CR)",
SUMDIAM_num <= 10 & SAXIS == "TOO SMALL TO MEASURE") ~ "COMPLETE RESPONSE (CR)",
(SUMDIAM_num <= 10 & near(SUMDIAM_num, SAXIS_num)) ~ "COMPLETE RESPONSE (CR)",
(SAXIS_num # (SAXIS_num <= 10 & (SUMDIAM_num - SAXIS_num <= 10)) ~ "COMPLETE RESPONSE (CR)",
-baseline_value)/baseline_value) <= -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
(((SUMDIAM_num>= 0.2) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
((percent_change_from_nadir is.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "STABLE DISEASE (SD)"
)%>%
) ungroup()
######### INVESTIGATOR/ONCOLOGIST
# NOT ASSESSABLE, ABSENT, PRESENT, UNEQUIVOCAL PROGRESSION
<- tr_where_rs_files[[1]] %>%
investigator_nontarget_new_tumors mutate(TREVALID = if_else(TREVALID == "", TREVAL, TREVALID)) %>%
::filter(
dplyr== "INVESTIGATOR",
TREVAL ::str_detect(TRLNKGRP, "^T-.*", negate = T),
stringr> 0
TRDY %>%
) select(USUBJID, TREVALID, TRLNKID, TRGRPID, TRORRES, TRDY) %>%
mutate(
tumor_label = case_when(
::str_detect(TRLNKID, "^NT.*") ~ "Non-target Response",
stringr::str_detect(TRLNKID, "^NEW.*") ~ "New Lesion Present"
stringr
),TRORRES = if_else(TRORRES == "BASELINE", "PRESENT", TRORRES)
%>%
) group_by(USUBJID, TRDY, tumor_label) %>%
mutate(
worst_response = case_when(
all(TRORRES == "ABSENT") ~ "ABSENT",
all(TRORRES == "NOT ASSESSABLE") ~ "NOT ASSESSABLE", # consider changing this to ALL instead of ANY--affects 3 records
any(TRORRES == "UNEQUIVOCAL PROGRESSION") ~ "UNEQUIVOCAL PROGRESSION",
all(TRORRES == "PRESENT") ~ "PRESENT",
all(TRORRES %in% c("PRESENT", "ABSENT")) ~ "PRESENT"
)%>%
) ungroup() %>%
distinct(USUBJID, TRDY, tumor_label, .keep_all = TRUE) %>%
select(USUBJID, TREVALID, RSTEST = tumor_label, RSORRES = worst_response, TRDY)
<- rs_files_with_multiple_raters[[1]] %>%
investigator_overall_response ::filter(
dplyr== "ONCOLOGIST",
RSEVALID != "NOT DONE",
RSSTAT != "DEATH",
RSORRES != "CLINRESP",
RSTESTCD > 0
RSDY %>%
) mutate(RSEVALID = "INVESTIGATOR") %>%
select(USUBJID, RSTEST, RSORRES, TREVALID = RSEVALID, TRDY= RSDY)
<- investigator_nontarget_new_tumors %>%
investigator_overall_nontarget_new_response bind_rows(investigator_overall_response) %>%
arrange(USUBJID, TRDY) %>%
pivot_wider(
values_from = RSORRES,
names_from = RSTEST
)
<- investigator_overall_nontarget_new_response %>%
study_one_investigator_recist_data left_join(
study_one_sld_data,by = c("USUBJID" = "USUBJID", "TREVALID" = "TREVALID", "TRDY" = "TRDY_sub")
%>%
) select(USUBJID:`New Lesion Present`, `Target Response` = target_response_calculated) %>%
mutate(
`New Lesion Present` = case_when(
`New Lesion Present` == "UNEQUIVOCAL PROGRESSION" ~ "Y",
is.na(`New Lesion Present`) ~ "N"
),`Non-target Response` = case_when(
`Non-target Response` == "ABSENT" ~ "COMPLETE RESPONSE (CR)",
`Non-target Response` == "UNEQUIVOCAL PROGRESSION" ~ "PROGRESSIVE DISEASE (PD)",
`Non-target Response` == "PRESENT" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
`Non-target Response` == "NOT ASSESSABLE" ~ "NOT ASSESSABLE"
)%>%
) rename(RSEVALID = TREVALID, RSDY = TRDY)
############# RATERS 1 AND 2
<- rs_files_with_multiple_raters[[1]] %>%
study_one_raters_recist_data ::filter(
dplyr%in% c("READER 1", "READER 2"),
RSEVALID > 0
RSDY %>%
) select(USUBJID, RSTEST, RSORRES, RSEVALID, RSDY) %>%
pivot_wider(
values_from = RSORRES,
names_from = RSTEST
)
# validate calculation of Target outcome in SLD data
# View(study_one_raters_recist_data %>%
# left_join(
# study_one_sld_data,
# by = c("USUBJID" = "USUBJID", "RSEVALID" = "TREVALID", "RSDY" = "TRDY_sub")
# ) %>%
# # select(-c(TRTEST:complete_response)) %>%
# mutate(
# target_check = `Target Response` == target_response_calculated
# ) %>%
# dplyr::filter(!target_check)
# )
# Only 8 discrepancies, and only on Complete vs. Partial response
### FINALLY COMBINE ALLLLL DATA
<- study_one_investigator_recist_data %>%
study_one_simulation_prepped_data bind_rows(study_one_raters_recist_data) %>%
arrange(USUBJID, RSDY, RSEVALID) %>%
left_join(
study_one_sld_data,by = c("USUBJID" = "USUBJID", "RSDY" = "TRDY_sub", "RSEVALID" = "TREVALID")
)
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
::plan(future::multicore, workers = 6)
future
<- furrr::future_pmap(all_combos, ~{
tr_study_one_simulation_data %>%
study_one_simulation_prepped_data mutate(
baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
progressive_disease = (percent_change_from_nadir > ..1) & ((SUMDIAM_num - nadir_cummin_value) > 5),
partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
complete_response = SUMDIAM_num == 0
%>%
) mutate(
target_response_calculated = case_when(
# SUMNLNLD
== "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
SUMDIAM == 0 ~ "COMPLETE RESPONSE (CR)",
SUMDIAM_num round(((SUMDIAM_num-baseline_value)/baseline_value), 3) <= -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
( >= ..1) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
((percent_change_from_nadir is.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "STABLE DISEASE (SD)"
)%>%
) ungroup() %>%
mutate(
target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
target_check = `Target Response` == target_response_calculated
%>%
) mutate(
overall_response_calculated = case_when(
`New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
`Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
== "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
target_response_calculated `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
`New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT APPLICABLE" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
`New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
`New Lesion Present` %in% c("N", "NOT EVALUABLE") & target_response_calculated == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT EVALUABLE" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
)%>%
) select(matches("USUBJID"), RSEVALID, RSDY, `Overall Response`, `Target Response`, `Non-target Response`,
`New Lesion Present`, SUMDIAM_num:baseline_response,
%>%
target_response_calculated, overall_response_calculated) ::filter(!is.na(RSEVALID))
dplyr
})
saveRDS(tr_study_one_simulation_data, "tr_study_one_simulation_data.rds")
B.6.0.17 thesis_code/simulate_thresholds/tr_data_study_three.R
library(tidyr)
if (!"rs_files_with_multiple_raters" %in% ls()){
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
}
if (!"tr_where_rs_files" %in% ls()){
<- readRDS("tr_where_rs_files.rds")
tr_where_rs_files
}
<- function(x) {
get_mode # If the vector has only one unique value, return it directly
if (length(unique(na.omit(x))) == 1) {
return(unique(na.omit(x)))
}
# Otherwise, calculate the mode
<- unique(x)
uniq_vals which.max(tabulate(match(x, uniq_vals)))]
uniq_vals[
}
<- function(x) {
cummin_na replace_na(x, Inf) %>% cummin() %>% replace(. == Inf, NA)
}
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
<- rs_files_with_multiple_raters[[3]] %>%
rs_study_three_ratings mutate(
RSEVALID = if_else(is.na(RSEVALID) | RSEVALID == "", RSEVAL, RSEVALID)
%>%
) arrange(USUBJID, RSDY, RSEVALID) %>%
::filter(
dplyr> 0,
RSDY == "RECIST 1.1",
RSCAT != "SCANUPL",
RSTESTCD != "20064700370000021" & RSDY != 165)
(USUBJID %>%
) select(USUBJID, RSDY, RSEVALID, RSTEST, RSORRES) %>%
mutate(
RSORRES = case_when(
== "New Lesion Progression" & RSORRES == "UNEQUIVOCAL" ~ "Y",
RSTEST == "New Lesion Progression" & RSORRES == "EQUIVOCAL" ~ "N",
RSTEST TRUE ~ RSORRES
),RSTEST = if_else(RSTEST == "New Lesion Progression", "New Lesion Present", RSTEST),
RSORRES = case_when(
== "CR" ~ "COMPLETE RESPONSE (CR)",
RSORRES == "PR" ~ "PARTIAL RESPONSE (PR)",
RSORRES == "SD" ~ "STABLE DISEASE (SD)",
RSORRES == "PD" ~ "PROGRESSIVE DISEASE (PD)",
RSORRES == "NE" ~ "NOT EVALUABLE",
RSORRES == "NON-CR/NON-PD" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
RSORRES TRUE ~ RSORRES
)%>%
) pivot_wider(
values_from = RSORRES,
names_from = RSTEST
%>%
) mutate(
`New Lesion Present` = if_else(is.na(`New Lesion Present`), "N", `New Lesion Present`)
%>%
) ::filter(
dplyr!is.na(`Overall Response`),
`Overall Response` != "BASELINE"
)# count(`Overall Response`, `Target Response`, `Non-target Response`, `New Lesion Present`) %>%
# View()
# All data for re-calculating Target Response for all raters appears to be
# available here, but the SUMLIDAM values do not directly have a date associated
# to them so primary key attached to a date needs to be found.
# It looks like the TRREFID variable can be used for this.
<- tr_where_rs_files[[3]] %>%
tr_study_data_three_target_tumors mutate(
TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
%>%
) ::filter(
dplyr== "RECIST 1.1",
TRCAT !str_detect(TRLNKGRP, "^NT.*|^NEW.*")
%>%
) group_by(USUBJID, TRREFID) %>%
mutate(
TRDY_sub = if_else(TREVALID %in% c("READER 1", "READER 2"), get_mode(TRDY), TRDY),
TRTESTCD = if_else(TRTESTCD == "", NA_character_, TRTESTCD)
%>%
) ungroup() %>%
::filter(
dplyr!TRTESTCD %in% c("TUMSTATE", "LDIAM", "ACNSD", "PCNSD"),
!is.na(TRDY_sub)
%>%
) ungroup() %>%
mutate(
TRSTRESC = case_when(
%in% c("TOO SMALL TO MEASURE (5 MM TO BE USED FOR CALCULATION)", "TOO SMALL TO MEASURE") ~ "5",
TRORRES TRUE ~ TRSTRESC
)%>%
) select(USUBJID, TRDY_sub, TREVALID, TRTESTCD, TRSTRESC) %>%
arrange(USUBJID, TRDY_sub, TREVALID) %>%
::pivot_wider(
tidyrnames_from = TRTESTCD,
values_from = TRSTRESC,
values_fn = ~sum(as.numeric(.x))
%>%
) mutate(
SUMDIAM_num = as.numeric(SUMDIAM),
SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num),
SAXIS_num = as.numeric(SAXIS),
SAXIS_num = if_else(SAXIS %in% c("TOO SMALL TO MEASURE"), 0, SAXIS_num)
%>%
) ::filter(!is.na(SUMDIAM)) %>%
dplyrgroup_by(USUBJID, TREVALID) %>%
mutate(
baseline_value = first(SUMDIAM_num),
mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
nadir_cummin_value = cummin_na(SUMDIAM_num)
%>%
) mutate(
percent_change_from_nadir = if_else(
!is.na(mm_change_from_previous),
- lag(nadir_cummin_value))/lag(nadir_cummin_value),
(SUMDIAM_num NA_real_
)%>%
) mutate(
baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
progressive_disease = (percent_change_from_nadir > 0.2) & ((SUMDIAM_num - nadir_cummin_value) > 5),
partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
complete_response = SUMDIAM_num == 0
%>%
) mutate(
target_response_calculated = case_when(
# SUMNLNLD
== "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
SUMDIAM == 0 ~ "COMPLETE RESPONSE (CR)",
SUMDIAM_num <= 10 & SAXIS == "TOO SMALL TO MEASURE") ~ "COMPLETE RESPONSE (CR)",
(SUMDIAM_num <= 10 & near(SUMDIAM_num, SAXIS_num)) ~ "COMPLETE RESPONSE (CR)",
(SAXIS_num # (SAXIS_num <= 10 & (SUMDIAM_num - SAXIS_num <= 10)) ~ "COMPLETE RESPONSE (CR)",
-baseline_value)/baseline_value) <= -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
(((SUMDIAM_num>= 0.2) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
((percent_change_from_nadir is.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "STABLE DISEASE (SD)"
)%>%
) ungroup()
# View()
<- rs_study_three_ratings %>%
check_ids ::filter(
dplyr!is.na(`Target Response`)
%>%
) left_join(
tr_study_data_three_target_tumors,by = c("USUBJID" = "USUBJID", "RSDY" = "TRDY_sub", "RSEVALID" = "TREVALID")
%>%
) mutate(
target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
target_check = `Target Response` == target_response_calculated
%>%
) mutate(
overall_response = case_when(
`New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
`Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
`Target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
`New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
`New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT APPLICABLE" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
`New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
`New Lesion Present` %in% c("N", "NOT EVALUABLE") & `Target Response` == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT EVALUABLE" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
)%>%
) mutate(
overall_check = `Overall Response` == overall_response
)
# check_ids %>%
# select(USUBJID, RSDY, target_response_calculated, RSEVALID) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
<- tr_where_rs_files[[3]] %>%
study_three_tr_data mutate(
TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
%>%
) ::filter(
dplyr== "RECIST 1.1",
TRCAT !str_detect(TRLNKGRP, "^NT.*|^NEW.*")
%>%
) group_by(USUBJID, TRREFID) %>%
mutate(
TRDY_sub = if_else(TREVALID %in% c("READER 1", "READER 2"), get_mode(TRDY), TRDY),
TRTESTCD = if_else(TRTESTCD == "", NA_character_, TRTESTCD)
%>%
) ungroup() %>%
::filter(
dplyr!TRTESTCD %in% c("TUMSTATE", "LDIAM", "ACNSD", "PCNSD"),
!is.na(TRDY_sub)
%>%
) ungroup() %>%
mutate(
TRSTRESC = case_when(
%in% c("TOO SMALL TO MEASURE (5 MM TO BE USED FOR CALCULATION)", "TOO SMALL TO MEASURE") ~ "5",
TRORRES TRUE ~ TRSTRESC
)%>%
) select(USUBJID, TRDY_sub, TREVALID, TRTESTCD, TRSTRESC) %>%
arrange(USUBJID, TRDY_sub, TREVALID) %>%
::pivot_wider(
tidyrnames_from = TRTESTCD,
values_from = TRSTRESC,
values_fn = ~sum(as.numeric(.x))
%>%
) mutate(
SUMDIAM_num = as.numeric(SUMDIAM),
SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num),
SAXIS_num = as.numeric(SAXIS),
SAXIS_num = if_else(SAXIS %in% c("TOO SMALL TO MEASURE"), 0, SAXIS_num)
%>%
) ::filter(!is.na(SUMDIAM)) %>%
dplyrgroup_by(USUBJID, TREVALID) %>%
mutate(
baseline_value = first(SUMDIAM_num),
mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
nadir_cummin_value = cummin_na(SUMDIAM_num)
%>%
) mutate(
percent_change_from_nadir = if_else(
!is.na(mm_change_from_previous),
- lag(nadir_cummin_value))/lag(nadir_cummin_value),
(SUMDIAM_num NA_real_
)%>%
) ungroup()
<- rs_study_three_ratings %>%
study_three_simulation_prepped_data left_join(
study_three_tr_data,by = c("USUBJID" = "USUBJID", "RSDY" = "TRDY_sub", "RSEVALID" = "TREVALID")
%>%
) group_by(USUBJID, RSEVALID)
::plan(future::multicore, workers = 6)
future
<- furrr::future_pmap(all_combos, ~{
tr_study_three_simulation_data %>%
study_three_simulation_prepped_data mutate(
baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
progressive_disease = (percent_change_from_nadir > ..1) & ((SUMDIAM_num - nadir_cummin_value) > 5),
partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
complete_response = SUMDIAM_num == 0
%>%
) mutate(
target_response_calculated = case_when(
# SUMNLNLD
== "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
SUMDIAM == 0 ~ "COMPLETE RESPONSE (CR)",
SUMDIAM_num <= 10 & SAXIS == "TOO SMALL TO MEASURE") ~ "COMPLETE RESPONSE (CR)",
(SUMDIAM_num <= 10 & near(SUMDIAM_num, SAXIS_num)) ~ "COMPLETE RESPONSE (CR)",
(SAXIS_num # (SAXIS_num <= 10 & (SUMDIAM_num - SAXIS_num <= 10)) ~ "COMPLETE RESPONSE (CR)",
-baseline_value)/baseline_value) <= -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
(((SUMDIAM_num>= ..1) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
((percent_change_from_nadir is.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "STABLE DISEASE (SD)"
)%>%
) ungroup() %>%
mutate(
overall_response = case_when(
`New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
`Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
== "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
target_response_calculated `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
`New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT APPLICABLE" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
`New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
`New Lesion Present` %in% c("N", "NOT EVALUABLE") & target_response_calculated == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT EVALUABLE" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
)
)
})
saveRDS(tr_study_three_simulation_data, "tr_study_three_simulation_data.rds")
# if (!"tr_study_three_simulation_data" %in% ls()){
# tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
#
#
#
# ################################ TARGET OUTCOME SIMULATION
#
#
# irr_simulation_target_outcome <- tr_study_three_simulation_data %>%
# map(~{
# .x %>%
# select(USUBJID, RSDY, RSEVALID, target_response_calculated) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
# study_three_original_irr_id <- all_combos %>%
# mutate(
# id = row_number()
# ) %>%
# dplyr::filter(
# progression_threshold == 0.2,
# partial_response_threshold == 0.3
# ) %>%
# pull(id)
#
#
#
# study_three_original_irr_value_target <- irr_simulation_target_outcome[[study_three_original_irr_id]]$value
#
#
# all_combos %>%
# mutate(
# fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome, ~.x[["value"]]),
# fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_target,
# fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_target
# ) %>%
# ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
# geom_tile() +
# # scale_fill_distiller(palette = "RdBu") +
# geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
# geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
# scale_fill_gradient2(
# low = "blue", # Color for negative values
# mid = "white", # Color at 0
# high = "red", # Color for positive values
# midpoint = 0 # Ensures color transition at zero
# ) +
# theme_bw() +
# labs(
# title = "Effect of Varying Progression and Response Thresholds on IRR",
# subtitle = "RECIST 1.1 *Target* Response in Study 3"
# )
#
#
#
#
#
#
#
#
#
#
#
# ###################################### OVERALL OUTCOME SIMULATION
#
#
#
# irr_simulation_overall_outcome_study_three <- tr_study_three_simulation_data %>%
# map(~{
# .x %>%
# select(USUBJID, RSDY, RSEVALID, overall_response) %>%
# tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
# tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response) %>%
# select(-1) %>%
# irr::kappam.fleiss(., detail = T)
# })
#
#
#
#
#
# study_three_original_irr_value_overall <- irr_simulation_overall_outcome_study_three[[study_three_original_irr_id]]$value
#
#
# all_combos %>%
# mutate(
# fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_three, ~.x[["value"]]),
# fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_overall,
# fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_overall
# ) %>%
# ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
# geom_tile() +
# # scale_fill_distiller(palette = "RdBu") +
# geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
# geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
# scale_fill_gradient2(
# low = "blue", # Color for negative values
# mid = "white", # Color at 0
# high = "red", # Color for positive values
# midpoint = 0 # Ensures color transition at zero
# ) +
# theme_bw() +
# labs(
# title = "Effect of Varying Progression and Response Thresholds on IRR",
# subtitle = "RECIST 1.1 *Overall* Response in Study 3")
#
#
B.6.0.18 thesis_code/simulate_thresholds/tr_data_study_two.R
library(tidyr)
if (!"rs_files_with_multiple_raters" %in% ls()){
<- readRDS("rs_files_with_multiple_raters.rds")
rs_files_with_multiple_raters
}
if (!"tr_where_rs_files" %in% ls()){
<- readRDS("tr_where_rs_files.rds")
tr_where_rs_files
}
<- function(x) {
cummin_na replace_na(x, Inf) %>% cummin() %>% replace(. == Inf, NA)
}
<- rs_files_with_multiple_raters[[2]] %>%
rs_study_two_ratings mutate(
RSEVALID = if_else(is.na(RSEVALID) | RSEVALID == "", RSEVAL, RSEVALID)
%>%
) arrange(UNI_ID, RSDY, RSEVALID) %>%
::filter(
dplyr> 0,
RSDY == "RECIST 1.1",
RSCAT != "GLOBAL REVIEW",
RSGRPID != "RSALL"
RSTESTCD %>%
) select(UNI_ID, RSDY, RSEVALID, RSTEST, RSORRES) %>%
mutate(
RSORRES = case_when(
== "New Lesion Progression" & RSORRES == "UNEQUIVOCAL" ~ "Y",
RSTEST == "New Lesion Progression" & RSORRES == "EQUIVOCAL" ~ "N",
RSTEST TRUE ~ RSORRES
),RSTEST = if_else(RSTEST == "New Lesion Progression", "New Lesion Present", RSTEST),
RSORRES = case_when(
== "CR" ~ "COMPLETE RESPONSE (CR)",
RSORRES == "PR" ~ "PARTIAL RESPONSE (PR)",
RSORRES == "SD" ~ "STABLE DISEASE (SD)",
RSORRES == "PD" ~ "PROGRESSIVE DISEASE (PD)",
RSORRES == "NE" ~ "NOT EVALUABLE",
RSORRES == "NON-CR/NON-PD" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
RSORRES == "NA" ~ "NOT ASSESSABLE",
RSORRES TRUE ~ RSORRES
)%>%
) pivot_wider(
values_from = RSORRES,
names_from = RSTEST
%>%
) mutate(
`New Lesion Present` = if_else(is.na(`New Lesion Present`), "N", `New Lesion Present`)
)
# Sum of Longest Diameter info for Investigators does not seem to be available
# If correct, I cannot calculate new Target Outcome responses for Investigator
# ! The necessary info for the investigator appears to be available in the
# ADRS ADaM dataset:
# haven::read_xpt("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer//Study_20250320125552850/YO40245_4_Unconverted/adrs.xpt")
<- haven::read_xpt("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer//Study_20250320125552850/YO40245_4_Unconverted/adrs.xpt")
study_two_adrs_data
# Can only get investigator from the ADRS because the other readers are just
# lumped together as "Independent Review Forum"
# study_two_adrs_data %>%
# dplyr::filter(
# PARCAT2 == "Investigator",
# PARCAT3 == "Recist 1.1"
# ) %>%
# count(PARAM, PARAMCD, PARCAT2, PARCAT3) %>%
# View()
<- study_two_adrs_data %>%
study_two_sld_data_investigator ::filter(
dplyr%in% c("VISSLD"),
PARAMCD == "Investigator"
PARCAT2 %>%
) select(-c(REGIONDI:MVI_EHS2)) %>%
arrange(UNI_ID, ADY) %>%
select(UNI_ID, TREVALID = PARCAT2, TRDY = ADY, PARAM, AVALC) %>%
pivot_wider(
names_from = PARAM,
values_from = AVALC
%>%
) rename(TRSTRESC = 4) %>%
mutate(
TRSTRESN = as.numeric(TRSTRESC),
TREVALID = "INVESTIGATOR"
)
# Need to figure out why almost all UNI_ID/TRDY/TREVALID combinations have
# 2 SUMLDIAM measurements rather than just 1. They are often at least
# slightly different values, and I need to know how to choose the correct one.
# The current grouping by UNI_ID, TREVALID, TRDY, TRSPID shows that there is
# a connection between TRSPID and the RECIST vs. HCC rating. I should be able
# to create a sudo variable that I can then filter on to retain the RECIST data
# READER 1 AND READER 2 SLD INFO!!!
<- tr_where_rs_files[[2]] %>%
study_two_sld_data_radiologists mutate(
TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
%>%
) arrange(UNI_ID, TREVALID, TRDY, TRSPID) %>%
group_by(TRSPID) %>%
mutate(
sld_grouping = case_when(
any(str_detect(TRLNKID, "RECIST 1.1")) ~ "RECIST",
any(str_detect(TRLNKID, "HCC mRECIST")) ~ "HCC",
TRUE ~ "none"
)%>%
) ungroup() %>%
::filter(
dplyr== "SUMLDIAM",
TRTESTCD == "RECIST"
sld_grouping %>%
) select(UNI_ID, TREVALID, TRDY, TRSTRESC, TRSTRESN)
#
# study_two_sld_data_radiologists %>%
# group_by(UNI_ID, TREVALID) %>%
# mutate(
# visit_number = row_number()
# ) %>%
# ungroup() %>%
# select(-TRSTRESC, -TRSTRESN) %>%
# pivot_wider(
# names_from = TREVALID,
# values_from = visit_number
# ) %>%
# mutate(
# check = `RADIOLOGIST 1` == `RADIOLOGIST 2`
# ) %>%
# View()
# study_two_sld_data_radiologists %>%
# group_by(UNI_ID) %>%
# arrange(UNI_ID, TRDY) %>%
# mutate(fuzzy_date_group = cumsum(c(1, diff(TRDY)) > 4)) %>%
# ungroup() %>%
# select(-TRSTRESC, -TRSTRESN) %>%
# View()
# pivot_wider(
# names_from = TREVALID,
# values_from = fuzzy_date_group
# ) %>%
# mutate(
# check = `RADIOLOGIST 1` == `RADIOLOGIST 2`
# ) %>%
# View()
<- study_two_sld_data_investigator %>%
study_two_tr_data bind_rows(study_two_sld_data_radiologists) %>%
mutate(
TRDY_reduced = if_else(
== "f58b117eaf882d2d7524288703747835fef69913bfaf91972f465cd8ddb53386" & TRDY == -22) |
(UNI_ID == "a4fa042453ec49e54bca11bb04bd9d5447e6ce8567c4257719c8f33273b2567b" & TRDY == -20) |
(UNI_ID == "f29e9a84fa93f7644fd4d5578e605797f6978f03485ca8f089a265330bc49028" & TRDY == -27) |
(UNI_ID == "b5483e217e5468468097734c8e5d6c2dd8adb43d0548a3db740fb148a13176a8" & TRDY == -25) |
(UNI_ID == "b5483e217e5468468097734c8e5d6c2dd8adb43d0548a3db740fb148a13176a8" & TRDY == -14) |
(UNI_ID == "f493993091c878bdf73c5a860283ee8f4c9f011e4e5dfa40fa25b4a4ae749e2b" & TRDY == -14) |
(UNI_ID == "adf20c5d546f5e29de31b18c24ed22539bdb4e3ea5d1faf2182de155002e6e8b" & TRDY == -16),
(UNI_ID -5,
TRDY
)%>%
) group_by(UNI_ID) %>%
arrange(UNI_ID, TRDY_reduced) %>%
mutate(fuzzy_date_group = cumsum(c(1, diff(TRDY_reduced)) >= 9)) %>%
ungroup() %>%
group_by(UNI_ID) %>%
ungroup() %>%
select(-TRSTRESN)
<- rs_study_two_ratings %>%
study_two_joined_rs_tr_data ::fuzzy_right_join(
fuzzyjoin
study_two_tr_data,by = c("UNI_ID" = "UNI_ID", "RSEVALID" = "TREVALID", "RSDY" = "TRDY"),
match_fun = list(`==`, `==`, function(x,y) abs(x - y) <= 10)
)
# View(study_two_joined_rs_tr_data)
<- study_two_joined_rs_tr_data %>%
study_two_finalized_data mutate(
TRSTRESC = if_else(TRSTRESC == "NE", "NOT EVALUABLE", TRSTRESC)
%>%
) rename(
SUMDIAM = TRSTRESC
%>%
) mutate(
SUMDIAM_num = as.numeric(SUMDIAM),
SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num)
%>%
) ::filter(!is.na(SUMDIAM)) %>%
dplyrgroup_by(UNI_ID.y, TREVALID) %>%
mutate(
baseline_value = first(SUMDIAM_num),
mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
nadir_cummin_value = cummin_na(SUMDIAM_num)
%>%
) mutate(
percent_change_from_nadir = if_else(
!is.na(mm_change_from_previous),
- lag(nadir_cummin_value))/lag(nadir_cummin_value),
(SUMDIAM_num NA_real_
)%>%
) mutate(
baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
progressive_disease = (percent_change_from_nadir > 0.2) & ((SUMDIAM_num - nadir_cummin_value) > 5),
partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
complete_response = SUMDIAM_num == 0
%>%
) mutate(
target_response_calculated = case_when(
# SUMNLNLD
== "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
SUMDIAM == 0 ~ "COMPLETE RESPONSE (CR)",
SUMDIAM_num round(((SUMDIAM_num-baseline_value)/baseline_value), 3) <= -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
( >= 0.2) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
((percent_change_from_nadir is.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "STABLE DISEASE (SD)"
)%>%
) ungroup() %>%
mutate(
target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
target_check = `Target Response` == target_response_calculated
%>%
) mutate(
overall_response = case_when(
`New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
`Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
`Target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
`New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
`New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT APPLICABLE" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
`New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
`New Lesion Present` %in% c("N", "NOT EVALUABLE") & `Target Response` == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT EVALUABLE" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
)
)
<- study_two_joined_rs_tr_data %>%
study_two_simulation_prepped_data mutate(
TRSTRESC = if_else(TRSTRESC == "NE", "NOT EVALUABLE", TRSTRESC)
%>%
) rename(
SUMDIAM = TRSTRESC
%>%
) mutate(
SUMDIAM_num = as.numeric(SUMDIAM),
SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num)
%>%
) ::filter(!is.na(SUMDIAM)) %>%
dplyrgroup_by(UNI_ID.y, TREVALID) %>%
mutate(
baseline_value = first(SUMDIAM_num),
mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
nadir_cummin_value = cummin_na(SUMDIAM_num)
%>%
) mutate(
percent_change_from_nadir = if_else(
!is.na(mm_change_from_previous),
- lag(nadir_cummin_value))/lag(nadir_cummin_value),
(SUMDIAM_num NA_real_
)
)
<- expand.grid(
all_combos progression_threshold = seq(0,1,0.01),
partial_response_threshold = seq(0,1,0.01)
)
?plan
::plan(future::multicore, workers = 6)
future
<- furrr::future_pmap(all_combos, ~{
tr_study_two_simulation_data %>%
study_two_simulation_prepped_data mutate(
baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
progressive_disease = (percent_change_from_nadir > ..1) & ((SUMDIAM_num - nadir_cummin_value) > 5),
partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
complete_response = SUMDIAM_num == 0
%>%
) mutate(
target_response_calculated = case_when(
# SUMNLNLD
== "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
SUMDIAM == 0 ~ "COMPLETE RESPONSE (CR)",
SUMDIAM_num round(((SUMDIAM_num-baseline_value)/baseline_value), 3) <= -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
( >= ..1) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
((percent_change_from_nadir is.na(mm_change_from_previous) ~ NA_character_,
TRUE ~ "STABLE DISEASE (SD)"
)%>%
) ungroup() %>%
mutate(
target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
target_check = `Target Response` == target_response_calculated
%>%
) mutate(
overall_response_calculated = case_when(
`New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
`Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
== "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
target_response_calculated `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
`New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
`New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT APPLICABLE" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
`New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
`New Lesion Present` %in% c("N", "NOT EVALUABLE") & target_response_calculated == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
`New Lesion Present` == "NOT EVALUABLE" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
`New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
)%>%
) select(matches("UNI_ID"), RSEVALID, RSDY, `Overall Response`, `Target Response`, `Non-target Response`,
`New Lesion Present`, SUMDIAM_num:baseline_response,
%>%
target_response_calculated, overall_response_calculated) ::filter(!is.na(RSEVALID))
dplyr
})
saveRDS(tr_study_two_simulation_data, "tr_study_two_simulation_data.rds")