Appendix B — Code

Authors

Affiliation

The code in this appendix is intended to be for proof of work. As such, many of the file paths in the appendix are relative to “missing” directories or have been redacted for privacy reasons, and original, patient-level data files are not included here. Estimates from simulations and some other components that are not dependent on the original data files are included in the data directory of the online repository.

B.1 Initial Code Files

B.1.1 Study Identification

B.1.1.1 thesis_code/identify_relevant_studies.R

 
library(dplyr)
library(magrittr)
library(purrr)
library(stringr)

studies_path <- "REDACTED"

studies_files <- list.files(
  studies_path,
  recursive = 2
)


grep("sld", studies_files, value = TRUE)
grep("ct", studies_files, value = TRUE)
grep("lesion", studies_files, value = TRUE)
grep("recist", studies_files, value = TRUE)
grep("tr.xpt", studies_files, value = TRUE)
grep("rs.xpt", studies_files, value = TRUE)
grep("adrs.xpt", studies_files, value = TRUE)
grep("^.*/rs.xpt$", studies_files, value = TRUE)






testing_file_loc <- "/Study_20250320143641493/GO29436_4_Unconverted"

list.files(
  paste0(studies_path, testing_file_loc),
  full.names = T
) %>%
  grep(".xpt", ., value = T) %>%
  set_names(., stringr::str_extract(., "(?<=\\/)[^\\/]+.xpt") ) %>%
  purrr::map(
    ~{
      haven::read_xpt(.x) %>%
        purrr::map_lgl(~any(stringr::str_detect(.x, "(STABLE|PROGRESSIVE|PARTIAL|COMPLETE)"))) %>%
        sum(na.rm = T)
    }
  )

#
# haven::read_xpt(
#   paste0(studies_path, testing_file_loc, "/priortx.xpt")
# ) %>%
#   # purrr::map_lgl(~any(stringr::str_detect(.x, "(STABLE|PROGRESSIVE|PARTIAL|COMPLETE)")))
#   View()







# Explore tr.xpt files ---------------------------------------



tr_files <- list.files(
  paste0(studies_path),
  full.names = T,
  recursive = T
) %>%
  grep("tr.xpt", ., value = T) %>%
  purrr::set_names()



all_tr_files <- tr_files %>%
  map(
    ~{
      haven::read_xpt(.x)
    }
  )


# TREVAL and TREVALID seem to be interesting vars to look at
treval_trevalid_studies <- all_tr_files %>%
  map(
    ~{
      cols_names <- names(.x)
      tibble(
        TREVAL = any(str_detect(cols_names, "TREVAL")),
        TREVALID = any(str_detect(cols_names, "TREVALID"))
      )
    }
  ) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_extract(id, "Study_.*")) %>%
  arrange(desc(TREVALID))



treval_trevalid_studies_data <- treval_trevalid_studies %>%
  dplyr::filter(TREVALID == T) %>%
  pull(id) %>%
  paste0(studies_path, "/", .) %>%
  set_names() %>%
  map(
    ~haven::read_xpt(.x)
  )


# View(treval_trevalid_studies_data[[1]])







# EXPLORE RS FILES ----------------------------------------------------------


# read in all of the available rs domain files

rs_file_paths <- list.files(
  studies_path,
  full.names = T,
  recursive = T
) %>%
  grep("^.*/rs.xpt$", ., value = T) %>%
  set_names()


all_rs_files <- rs_file_paths %>%
  map(
    ~haven::read_xpt(.x)
  )



# identify studies where it appears there are multiple raters
# i.e. filter for studies where multiple "things" can be found in either
# the RSEVAL or RSEVALID variables

# 5
# View(all_rs_files[[6]])

rseval_and_rsevalid_info <- all_rs_files %>%
  map(
    ~{
      rseval_vals <- if ("RSEVAL" %in% names(.x)) select(.x, RSEVAL) %>% pull() %>% unique()
      rsevalid_vals <- if ("RSEVALID" %in% names(.x)) select(.x, RSEVALID) %>% pull() %>% unique()
      return(list(rseval = rseval_vals, rsevalid = rsevalid_vals))
    }
  )


rs_file_paths_with_multiple_raters <- rseval_and_rsevalid_info %>%
  map_lgl(
    ~{
      length(.x[[1]]) > 1 | length(.x[[2]]) > 1
    }
  ) %>%
  which() %>%
  names()


# read in the studies identified to have multiple SLD image assessors

rs_files_with_multiple_raters <- rs_file_paths_with_multiple_raters %>%
  map(
    ~{
      haven::read_xpt(.x)
    }
  )


# View(rs_files_with_multiple_raters[[2]])


# count the number of unique participants

rs_files_with_multiple_raters %>%
  map_dbl(~{pull(.x, matches("USUBJID|UNI_ID")) %>% unique() %>% length()})


# count the number of unique scan or visit dates per participant

rs_files_with_multiple_raters %>%
  map_dbl(
    ~{
      group_var_sym <- sym(names(select(.x,matches("USUBJID|UNI_ID"))))
      .x %>%
        group_by(!!group_var_sym, RSDY) %>%
        summarize(count = n(), .groups = 'drop') %>%
        nrow()
      }
    )

B.1.1.2 thesis_code/identify_studies_with_rs_data.R

 
library(purrr)
library(magrittr)
library(dplyr)

studies_path <-"REDACTED"

rs_file_paths <- list.files(
  studies_path,
  full.names = T,
  recursive = T
) %>%
  grep("^.*/rs.xpt$", ., value = T)

rs_file_paths %<>%
  purrr::set_names()


all_rs_files <- rs_file_paths %>%
  map(
    ~haven::read_xpt(.x)
  )



# Manually check/confirm presence of useful RSEVAL and RSEVALID in the files

# if (interactive()) {
#   View(all_rs_files[[1]]) # confirmed no
#   View(all_rs_files[[2]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[3]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[4]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[5]]) # accepted.
#   View(all_rs_files[[6]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[7]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[8]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[9]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[10]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[11]]) # has investigator, oncologist 1, oncologist 2, and an adjudicator
#   View(all_rs_files[[12]]) # has RADIOLOGIST 1 and 2, but only 60 records and no 3rd rater
#   # `/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125523650/M16-289_17_Unconverted/rs.xpt`
#   View(all_rs_files[[13]]) # accepted. has investigator and radiologist 1/2
#   View(all_rs_files[[14]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[15]]) # accepted. has investigator and radiologist 1/2
#   View(all_rs_files[[16]]) # only investigator listed for RSEVAL
#   View(all_rs_files[[17]])
# }








# Programmatically complete the same process for identifying useful data

# identify studies where it appears there are multiple raters
# i.e. filter for studies where multiple "things" can be found in either
# the RSEVAL or RSEVALID variables


rseval_and_rsevalid_info <- all_rs_files %>%
  map(
    ~{
      rseval_vals <- if ("RSEVAL" %in% names(.x)) select(.x, RSEVAL) %>% pull() %>% unique() %>% setdiff("")
      rsevalid_vals <- if ("RSEVALID" %in% names(.x)) select(.x, RSEVALID) %>% pull() %>% unique() %>% setdiff("")
      return(list(rseval = rseval_vals, rsevalid = rsevalid_vals))
    }
  )


rs_file_paths_with_multiple_raters <- rseval_and_rsevalid_info %>%
  map_lgl(
    ~{
      length(.x[[1]]) > 1 | length(.x[[2]]) > 1
    }
  ) %>%
  which() %>%
  names() %>%
  # specifically remove study 12 as it has a limited number of records
  setdiff("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125523650/M16-289_17_Unconverted/rs.xpt") %>%
  # remove another study as it does not actually use RECIST guidelines
  setdiff("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125458298/EFC14335_3_Unconverted/rs.xpt") %>%
  purrr::set_names()


# read in the studies identified to have multiple SLD image assessors

rs_files_with_multiple_raters <- rs_file_paths_with_multiple_raters %>%
  map(
    ~{
      haven::read_xpt(.x)
    }
  )

rs_files_with_multiple_raters[[1]] %<>%
  mutate(RSEVALID = if_else(RSEVALID == "ONCOLOGIST", "SITE INVESTIGATOR", RSEVALID)) %>%
  mutate(
    RSEVALID = if_else(RSEVALID == "", RSEVAL, RSEVALID)
  )

rs_files_with_multiple_raters[[2]] %<>%
  mutate(RSEVALID = case_when(
    RSEVALID == "RADIOLOGIST 1" ~ "READER 1",
    RSEVALID == "RADIOLOGIST 2" ~ "READER 2",
    TRUE ~ RSEVALID
  )
) %>%
  mutate(
    RSEVALID = if_else(RSEVALID == "", RSEVAL, RSEVALID)
  ) %>%
  mutate(RSEVALID = if_else(RSEVALID == "INVESTIGATOR", "SITE INVESTIGATOR", RSEVALID))


rs_files_with_multiple_raters[[3]] %<>%
  mutate(
    RSEVALID = if_else(RSEVALID == "", RSEVAL, RSEVALID)
  ) %>%
  mutate(RSEVALID = if_else(RSEVALID == "INVESTIGATOR", "SITE INVESTIGATOR", RSEVALID))

saveRDS(rs_files_with_multiple_raters, "rs_files_with_multiple_raters.rds")

B.1.1.3 thesis_code/identify_tr_sumdiam_data.R

 



#### CHECK TR FILES WHERE RS DATA ALREADY IDENTIFIED

library(stringr)


tr_where_rs_files <- names(rs_file_paths_with_multiple_raters) %>%
  str_remove_all("/rs.xpt") %>%
  paste0("/tr.xpt") %>%
  map(
    ~haven::read_xpt(.x)
  )

saveRDS(tr_where_rs_files, "tr_where_rs_files.rds")

tr_where_rs_files

# easy enough
tr_where_rs_files[[1]] %>%
  # dplyr::filter(TRTESTCD == "SUMDIAM") %>%
  arrange(USUBJID, TRDY, TREVAL, TREVALID) %>%
  dplyr::filter(TRSTRESU == "mm") %>%
  View()


# what is going on here??
tr_where_rs_files[[2]] %>%
  # dplyr::filter(TRTESTCD =="SUMLDIAM") %>%
  # dplyr::filter(TREVAL == "INVESTIGATOR") %>%
  dplyr::filter(TRSTRESU == "mm") %>%
  arrange(UNI_ID, TRDY, TREVAL, TREVALID) %>%
  dplyr::filter(str_detect(UNI_ID, "08d7eb2980d3fcf841b2aaf41bc848faae5ea0c09429bd8408a35e5e0ecd8d2d")) %>%
  dplyr::filter(str_detect(TRLNKID, "^RECIST 1.1.*|^$|^INV.*") | is.na(TRLNKID)) %>%
  # count(TRTESTCD) %>%
  View()


# easy enough
tr_where_rs_files[[3]] %>%
  dplyr::filter(TRCAT == "RECIST 1.1") %>%
  dplyr::filter(TRTESTCD == "SUMDIAM") %>%
  arrange(USUBJID, TRDY, TREVAL, TREVALID) %>%
  View()


















### CHECK SUPPLEMENTAL FILES FOR TUMOR INFORMATION


# ADRS DOMAIN IF IT EXISTS
adrs_data <- names(rs_file_paths_with_multiple_raters) %>%
  str_remove_all("/rs.xpt") %>%
  paste0("/adrs.xpt") %>%
  map(
    ~try(haven::read_xpt(.x))
  )

adrs_data[[2]] %>%
  # dplyr::filter(PARAM %in% c("Sum of Longest Diameters by Visit", "Sum of Longest Diameters by Visit by IRF")) %>%
  arrange(UNI_ID, AVISITN) %>%
  View()





# TU DOMAIN IF IT EXISTS
# note that this domain does not have measurements of tumors
# but it does seem to clearly identify new, target, and non-target lesions


# tu_data <- names(rs_file_paths_with_multiple_raters) %>%
#   str_remove_all("/rs.xpt") %>%
#   paste0("/tu.xpt") %>%
#   map(
#     ~try(haven::read_xpt(.x))
#   )
#
#
# tu_data[[3]] %>% View()



# SUPPTR had no extra info of value

# supptr_data <- names(rs_file_paths_with_multiple_raters) %>%
#   str_remove_all("/rs.xpt") %>%
#   paste0("/supptr.xpt") %>%
#   map(
#     ~try(haven::read_xpt(.x))
#   )





all_xpts_one <- names(rs_file_paths_with_multiple_raters) %>%
  str_remove_all("/rs.xpt") %>%
  .[1] %>%
  list.files(full.names = T) %>%
  grep(".xpt", ., value = T) %>%
  set_names() %>%
  map(
    ~haven::read_xpt(.x)
  )


purrr::keep(all_xpts_one,
            ~{
              .x %>%
                purrr::map_lgl(~any(stringr::str_detect(.x, "mm"))) %>%
                sum(na.rm = T) %>%
                {. > 0}
            }
)

B.1.2 Data Preparation

B.1.2.1 thesis_code/clean_rs_domain_data.R

 
rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")


rs_data_cleaned_wide <- list()





rs_data_cleaned_wide$NCT02395172$first <- rs_files_with_multiple_raters[["REDACTED/EMR 100070-004_10_Unconverted/rs.xpt"]] %>%
  arrange(USUBJID, RSDY) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSSTRESC != "" & !is.na(RSSTRESC),
    !RSEVALID %in% c(""),
    !is.na(RSDY) & RSDY > 0
  ) %>%
  select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
  group_by(USUBJID) %>% dplyr::filter(RSDY == min(RSDY)) %>%
  tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)



rs_data_cleaned_wide$NCT02395172$last <- rs_files_with_multiple_raters[["REDACTED/EMR 100070-004_10_Unconverted/rs.xpt"]] %>%
  arrange(USUBJID, RSDY) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSSTRESC != "" & !is.na(RSSTRESC),
    !RSEVALID %in% c(""),
    !is.na(RSDY) & RSDY > 0
  ) %>%
  select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
  group_by(USUBJID) %>% dplyr::filter(RSDY == max(RSDY)) %>%
  tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)



rs_data_cleaned_wide$NCT02395172$overall <- rs_files_with_multiple_raters[["REDACTED/EMR 100070-004_10_Unconverted/rs.xpt"]] %>%
  arrange(USUBJID, RSDY) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSSTRESC != "" & !is.na(RSSTRESC),
    !RSEVALID %in% c(""),
    !is.na(RSDY) & RSDY > 0
  ) %>%
  select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
  tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)













# Study_20250320125552850
rs_data_cleaned_wide$NCT03434379$first <- rs_files_with_multiple_raters[["REDACTED/YO40245_4_Unconverted/rs.xpt"]] %>%
  arrange(UNI_TRNC, RSDY, RSTEST) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  ) %>%
  select(UNI_TRNC, RSDY, RSSTRESC, RSEVALID) %>%
  group_by(UNI_TRNC) %>% dplyr::filter(RSDY == min(RSDY)) %>%
  tidyr::unite("Unique_Observation", c("UNI_TRNC", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)




rs_data_cleaned_wide$NCT03434379$last <- rs_files_with_multiple_raters[["REDACTED/YO40245_4_Unconverted/rs.xpt"]] %>%
  arrange(UNI_TRNC, RSDY, RSTEST) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  ) %>%
  arrange(UNI_TRNC, RSDY) %>%
  select(UNI_TRNC, RSDY, RSSTRESC, RSEVALID) %>%
  group_by(UNI_TRNC) %>% dplyr::filter(RSDY == max(RSDY)) %>%
  tidyr::unite("Unique_Observation", c("UNI_TRNC", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)


rs_data_cleaned_wide$NCT03434379$overall <- rs_files_with_multiple_raters[["REDACTED/YO40245_4_Unconverted/rs.xpt"]] %>%
  arrange(UNI_TRNC, RSDY, RSTEST) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  ) %>%
  arrange(UNI_TRNC, RSDY) %>%
  select(UNI_TRNC, RSDY, RSSTRESC, RSEVALID) %>%
  tidyr::unite("Unique_Observation", c("UNI_TRNC", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)















# this study has a define.xml file I could read-in to parse for more info
rs_data_cleaned_wide$NCT03631706$first <- rs_files_with_multiple_raters[["REDACTED/MS200647_0037_10_Unconverted/rs.xpt"]] %>%
  arrange(USUBJID, RSDY) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSCAT == "RECIST 1.1"
  ) %>%
  # one instance of a duplicate--remove with distinct
  distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
  select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
  group_by(USUBJID) %>% dplyr::filter(RSDY == min(RSDY)) %>%
  tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)



rs_data_cleaned_wide$NCT03631706$last <- rs_files_with_multiple_raters[["REDACTED/MS200647_0037_10_Unconverted/rs.xpt"]] %>%
  arrange(USUBJID, RSDY) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSCAT == "RECIST 1.1"
  ) %>%
  # one instance of a duplicate--remove with distinct
  distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
  select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
  group_by(USUBJID) %>% dplyr::filter(RSDY == max(RSDY)) %>%
  tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)



rs_data_cleaned_wide$NCT03631706$overall <- rs_files_with_multiple_raters[["REDACTED/MS200647_0037_10_Unconverted/rs.xpt"]] %>%
  arrange(USUBJID, RSDY) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSCAT == "RECIST 1.1"
  ) %>%
  # one instance of a duplicate--remove with distinct
  distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
  select(USUBJID, RSDY, RSSTRESC, RSEVALID) %>%
  tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
  tidyr::pivot_wider(names_from = RSEVALID, values_from = RSSTRESC)

B.1.2.2 thesis_code/clean_tr_domain_data.R

 
get_mode <- function(x) {
  # If the vector has only one unique value, return it directly
  if (length(unique(na.omit(x))) == 1) {
    return(unique(na.omit(x)))
  }

  # Otherwise, calculate the mode
  uniq_vals <- unique(x)
  uniq_vals[which.max(tabulate(match(x, uniq_vals)))]
}


# tr_where_rs_files[[1]] %>%
#   dplyr::filter(TRTESTCD == "SUMDIAM") %>%
#   mutate(
#     TRLNKGRP_sub = str_remove(TRLNKGRP, "_E[:digit:]") ,
#     .after = TRLNKGRP
#   ) %>%
#   group_by(USUBJID, TRLNKGRP_sub) %>%
#   mutate(
#     TRDY_sub = get_mode(TRDY)
#   ) %>%
#   ungroup() %>%
#   dplyr::filter(!is.na(TRDY_sub)) %>%
#   mutate(
#     TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
#   ) %>%
#   group_by(USUBJID, TREVALID) %>%
#   dplyr::filter(!is.na(TRSTRESN)) %>%
#   mutate(
#     baseline_value = first(TRSTRESN),
#     mm_change_from_baseline = TRSTRESN - first(TRSTRESN),
#     mm_change_from_previous = TRSTRESN - lag(TRSTRESN),
#     nadir_absolute_value = min(TRSTRESN),
#     nadir_cummin_value = cummin(TRSTRESN)
#   ) %>%
#   select(USUBJID, TRLNKGRP, TRLNKGRP_sub, TRORRES, TRSTRESN, TREVAL, TREVALID, TRDY_sub, baseline_value, matches("mm_change"), matches("nadir")) %>%
#   mutate(
#     percent_response = if_else(
#       !is.na(mm_change_from_previous),
#       (TRSTRESN - lag(nadir_cummin_value))/lag(nadir_cummin_value),
#       NA_real_
#     )
#   ) %>%
#   mutate(
#     baseline_response = ((TRSTRESN-baseline_value)/baseline_value),
#     progressive_disease = (percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5),
#     partial_response = (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease,
#     complete_response = TRSTRESN == 0
#   ) %>%
#   mutate(
#     recist_response = case_when(
#       TRSTRESN == 0 ~ "CR",
#       ((percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5)) ~ "PD",
#       (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease ~ "PR",
#       is.na(mm_change_from_previous) ~ NA_character_,
#       TRUE ~ "SD"
#     )
#   ) %>%
#   group_by(USUBJID) %>%
#   dplyr::filter(any(TRSTRESN == 0)) %>%
#   arrange(USUBJID, TREVALID) %>%
#   View()










tr_study_data_one_target_tumors <- tr_where_rs_files[[1]] %>%
  dplyr::filter(TRTESTCD == "SUMDIAM") %>%
  mutate(
    TRLNKGRP_sub = str_remove(TRLNKGRP, "_E[:digit:]") ,
    .after = TRLNKGRP
  ) %>%
  group_by(USUBJID, TRLNKGRP_sub) %>%
  mutate(
    TRDY_sub = get_mode(TRDY)
  ) %>%
  ungroup() %>%
  dplyr::filter(!is.na(TRDY_sub)) %>%
  mutate(
    TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
  ) %>%
  group_by(USUBJID, TREVALID) %>%
  dplyr::filter(!is.na(TRSTRESN)) %>%
  mutate(
    baseline_value = first(TRSTRESN),
    mm_change_from_baseline = TRSTRESN - first(TRSTRESN),
    mm_change_from_previous = TRSTRESN - lag(TRSTRESN),
    nadir_absolute_value = min(TRSTRESN),
    nadir_cummin_value = cummin(TRSTRESN)
  ) %>%
  select(USUBJID, TRLNKID, TRLNKGRP, TRLNKGRP_sub, TRTESTCD, TRTEST, TRORRES, TRSTRESN, TREVAL, TREVALID, TRDY_sub, baseline_value, matches("mm_change"), matches("nadir")) %>%
  mutate(
    percent_response = if_else(
      !is.na(mm_change_from_previous),
      (TRSTRESN - lag(nadir_cummin_value))/lag(nadir_cummin_value),
      NA_real_
    )
  )



tr_study_data_one_nontarget_new_tumors <- tr_where_rs_files[[1]] %>%
  dplyr::mutate(TREVALID  = if_else(is.na(TREVALID) | TREVALID  == "", TREVAL, TREVALID)) %>%
  dplyr::filter(
    TRTESTCD == "TUMSTATE",
    # str_detect(TRLNKID, "^NT.*|^NEW.*"),
    !is.na(TRDY), TRDY > 0,
    # TREVALID == "INVESTIGATOR"
  ) %>%
  group_by(USUBJID, TRDY, TREVALID) %>%
  summarize(
    new_lesion =
      case_when(
        any(TREVALID == "INVESTIGATOR" & str_detect(TRLNKID, "^NEW.*")) ~ "Yes",
        any(str_detect(TREVALID, "READER") & str_detect(TRLNKID, "^NEW.*") & str_detect(TRSTRESC, "^UNEQUIVOCAL PROGRESSION$|^UNEQUIVOCAL, PREVIOUSLY EQUIVOCAL$|^UNEQUIVOCAL, STILL PRESENT$|^NEW UNEQUIVOCAL$")) ~ "Yes",
        TRUE ~ "No"
      ),
    non_target_lesion_status =
      case_when(
        any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^DISAPPEARED$|^ABSENT$")) ~ "CR",
        any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^UNEQUIVOCAL PROGRESSION$")) ~ "PD",
        any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^STABLE$|^PROGRESSED$|^PRESENT$|^NON-PATHOLOGICAL NODE.*$|^DECREASED$")) ~ "Non-CR/Non-PD",
        any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^NOT EVALUABLE$")) ~ "NOT EVALUABLE",
        any(str_detect(TRLNKID, "^NT.*") & str_detect(TRSTRESC, "^NOT ASSESSABLE$")) ~ "NOT ASSESSABLE",
        any(str_detect(TRLNKID, "^NT.*")) ~ "NO STATUS",
        TRUE ~ NA_character_
      )
  ) %>%
  ungroup()





tr_study_data_one_target_tumors %>%
  mutate(
    baseline_response = ((TRSTRESN-baseline_value)/baseline_value),
    progressive_disease = (percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5),
    partial_response = (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease,
    complete_response = TRSTRESN == 0
  ) %>%
  mutate(
    recist_response = case_when(
      TRSTRESN == 0 ~ "CR",
      ((percent_response > 0.2) & ((TRSTRESN - nadir_cummin_value) > 5)) ~ "PD",
      (((TRSTRESN-baseline_value)/baseline_value) < -0.30) & (((TRSTRESN-baseline_value)/baseline_value) > -1) & (TRSTRESN != 0) & !progressive_disease ~ "PR",
      is.na(mm_change_from_previous) ~ NA_character_,
      TRUE ~ "SD"
    )
  ) %>%
  left_join(
    tr_study_data_one_nontarget_new_tumors,
    by = c("USUBJID" = "USUBJID", "TRDY_sub" = "TRDY", "TREVALID" = "TREVALID")
  ) %>%
  mutate(
    new_lesion = if_else(is.na(new_lesion), "No", new_lesion)
  ) %>%
  mutate(
    overall_response = case_when(
      recist_response == "PD" ~ "PD",
      new_lesion == "Yes" ~ "PD",
      new_lesion == "No" & recist_response == "SD" & (non_target_lesion_status %in% c("CR", "Non-CR/Non-PD") | is.na(non_target_lesion_status)) ~ "SD",
      new_lesion == "No" & recist_response == "PR" & (non_target_lesion_status %in% c("CR", "Non-CR/Non-PD") | is.na(non_target_lesion_status)) ~ "PR",
      new_lesion == "No" & recist_response == "CR" & (non_target_lesion_status %in% c("Non-CR/Non-PD") | is.na(non_target_lesion_status)) ~ "PR",
      new_lesion == "No" & recist_response == "CR" & (non_target_lesion_status %in% c("CR") | is.na(non_target_lesion_status)) ~ "CR",
    )
  )

B.2 IRR Analyses

B.2.0.1 thesis_code/irr_analyses/calculate_irr.R

 
source("clean_rs_domain_data.R")

library(purrr)
# Fleiss Kappa for studies where it's possible

rs_data_cleaned_wide %>%
  map(
    ~{
      map(.x, ~{.x %>%
        select(-1) %>%
        irr::kappam.fleiss(., detail = T)}
      )
    }
  ) %>%
  map(~map(.x, ~.x$value))




library(boot)
library(irr)

fleiss_kappa_func <- function(data, indices) {
  # Resample the data
  sample_data <- data[indices, ]
  # Calculate Fleiss' Kappa for the resampled data
  kappa <- kappam.fleiss(sample_data)$value
  return(kappa)
}

# Bootstrapping

boostrapped_fleiss_kappas <- map(rs_data_cleaned_wide, ~{map(.x,
    ~{
      set.seed(123) # For reproducibility
      results <- boot(data = select(.x,-1), statistic = fleiss_kappa_func, R = 100)
      results
    }
  )
  }
)

# str(boostrapped_fleiss_kappas[[1]]$first)
#
# boostrapped_fleiss_kappas[[1]]$first$data %>%
#   boostrapped_fleiss_kappas[[1]]$first$statistic()
#
#
# sd(boostrapped_fleiss_kappas[[1]]$first$t)


fleiss_kappa_estimates_ses <- map(boostrapped_fleiss_kappas, ~{
  map(.x, ~{
    Estimate <- .x$statistic(.x$data)
    SE <- sd(.x$t)
    return(tibble(Estimate = Estimate, SE = SE))
  })
})

B.2.0.2 thesis_code/irr_analyses/calculate_pairwise_rater_irr.R

 
# cohens_kappa_example <- rs_data_cleaned_wide$NCT02395172$overall %>%
#   select(2,3) %>%
#   irr::kappa2()

library(boot)

cohens_kappa_func <- function(data, indices) {
  # Resample the data
  sample_data <- data[indices, ]
  # Calculate Fleiss' Kappa for the resampled data
  kappa <- irr::kappa2(sample_data)$value
  return(kappa)
}



# Create pairwise data sets for the raters
cohens_kappa_data <- list()

cohens_kappa_data$reader1_reader2 <- rs_data_cleaned_wide %>%
  map(~map(.x, ~select(.x, -1) %>% select(matches("READER|RADIOLOGIST"))))


cohens_kappa_data$investigator_reader2 <- rs_data_cleaned_wide %>%
  map(~map(.x, ~select(.x, -1)
           %>% select(matches("(INVESTIGATOR|ONCOLOGIST)"), matches("(READER 1|RADIOLOGIST 1)"))
           )
      )

cohens_kappa_data$investigator_reader1 <- rs_data_cleaned_wide %>%
  map(~map(.x, ~select(.x, -1)
           %>% select(matches("(INVESTIGATOR|ONCOLOGIST)"), matches("(READER 2|RADIOLOGIST 2)"))
          )
      )






# Calculate Cohen's kappa for all pairwise ratings
cohens_kappa_results <- list()

cohens_kappa_results$reader1_reader2 <- map(
  cohens_kappa_data$reader1_reader2,
  ~{map(.x,
         ~{
           set.seed(123) # For reproducibility
           results <- boot(data = .x, statistic = cohens_kappa_func, R = 10)
           results
         }
        )
    }
)


cohens_kappa_results$investigator_reader2 <- map(
  cohens_kappa_data$investigator_reader2,
  ~{map(.x,
        ~{
          set.seed(123) # For reproducibility
          results <- boot(data = .x, statistic = cohens_kappa_func, R = 10)
          results
        }
  )
  }
)


cohens_kappa_results$investigator_reader1 <- map(
  cohens_kappa_data$investigator_reader1,
  ~{map(.x,
        ~{
          set.seed(123) # For reproducibility
          results <- boot(data = .x, statistic = cohens_kappa_func, R = 10)
          results
        }
  )
  }
)










# Create cross-tabulations for all data frames

# Calculate Cohen's kappa for all pairwise ratings
pairwise_tabulations <- list()

pairwise_tabulations$reader1_reader2 <- map(
  cohens_kappa_data$reader1_reader2,
  ~{map(.x,
        ~{xtabs(data = .x)}
  )
  }
)


pairwise_tabulations$investigator_reader2 <- map(
  cohens_kappa_data$investigator_reader2,
  ~{map(.x,
        ~{xtabs(data = .x)}
  )
  }
)


pairwise_tabulations$investigator_reader1 <- map(
  cohens_kappa_data$investigator_reader1,
  ~{map(.x,
        ~{xtabs(data = .x)}
  )
  }
)



cohens_kappa_data$investigator_reader1$NCT02395172$overall %>%
  irr::kappa2(.)



pairwise_cohens <- map(cohens_kappa_data, ~{
  map(.x, ~{
   # browser()
    temp <- .x[["overall"]] %>%
      mutate(across(everything(), ~factor(.x, levels = c('NON-CR/NON-PD', 'CR', 'PD', 'PR', 'NE', 'SD')))) %>%
      table() %>%
      psych::cohen.kappa()
    out <- paste0(round(temp$kappa, 3), " [" , round(temp$confid[1],3), ", ", round(temp$confid[5],3), "], n=", temp$n.obs)
    print(out)
    out
  })
})

# temp_result <- cohens_kappa_data$investigator_reader2$NCT03434379$overall %>%
#   irr::kappa2()
#
#
# temp_result <- cohens_kappa_data$investigator_reader2$NCT03434379$overall %>%
#   mutate(across(everything(), ~factor(.x, levels = c('NON-CR/NON-PD', 'CR', 'PD', 'PR', 'NE', 'SD')))) %>%
#   table() %>%
#   psych::cohen.kappa()



# pairwise_cohens %>%
#   bind_rows(.id = "Pairs") %>%
#   mutate(Average = (NCT02395172 + NCT03434379 + NCT03631706)/3,
#          Pairs = case_when(
#            Pairs == "reader1_reader2" ~ "Central Reviewer 1 vs. 2",
#            Pairs == "investigator_reader1" ~ "Site Investigator vs. Central Reviewer 1",
#            Pairs == "investigator_reader2" ~ "Site Investigator vs. Central Reviewer 2"
#          )
#          ) %>%
#   gt::gt() %>%
#   gt::tab_header(title = glue::glue("Pairwise Cohen's Kappa values for Clinical Trials")) %>%
#   gt::tab_footnote(
#     footnote = "Surprisingly, the average agreement for the central reviewers is lower than the agreement between site investigators and central reviwers. However, there is quite a bit of variability within studies, as the central reviewers of NCT02395172 exhibted substantially lower overall agreement than the site investigators vs. central review."
#   )


saveRDS(pairwise_cohens, "transfer/pairwise_cohens_summarized.rds")

B.2.0.3 thesis_code/irr_analyses/cochrane_diagram.R

 
lit_search <- list()
lit_search$file_path <- "irr_analyses/Metaanalysis Literature Search.xlsx"

lit_search$sheet_names <- readxl::excel_sheets(
  lit_search$file_path
)


lit_search$data <- map(
  set_names(lit_search$sheet_names),
  ~readxl::read_excel(lit_search$file_path, sheet = .x)
)



lit_search$exclusion_strings$compare_to_or_using_other_frameworks_string <- c(
  "C omparing RECIST to another framework",
  "Comparing CT and PET scan concordance",
  "Comparing RECIST 1.1 with RECIST 1.0",
  "Comparing RECIST to Choi and Tumor Attenuation criteria",
  "Comparing RECIST to EURAMOS",
  "Comparing RECIST to another framewoek",
  "Comparing RECIST to other frameworks",
  "Comparing RECIST to volumetric approach",
  "Comparing RECIST with EORTC and PERCIST",
  "Comparing RECIST with PNS",
  "Comparing RECIST with histological findings",
  "Comparing RECIST with iRECIST",
  "Comparing RECIST with iRECIST and mRECIST",
  "Comparing RECIST with other metrics",
  "Comparing RECIST with other modalities",
  "Comparing RECIST with serum 5-hiaa levels",
  "Comparing RECIST with volumetric measure",
  "Comparing imaging modalities",
  "Comparing other metrics",
  "Comparing radiography combined with other modalities",
  "Evaluating RECIST vs. Choi criteria",
  "evaluating reliability of computer-aided approach"
)

lit_search$exclusion_strings$relevant_but_no_irr <- c(
  "Did not compare the raters on needed outcome",
  "Comparing agreement over time?",
  "Did not do an IRR",
  "No interobserver evaluation done",
  "Not comparing raters",
  "Would be possible based on study structure, but data/information to calculate not provided"
)

lit_search$exclusion_strings$measured_sld_variability_but_not_recist_outcome <- c(
  "Evaluating measurement differences, but not outcome differences",
  "Evaluating measuring technique, not ordinal outcome of RECIST",
  "Has an ICC analysis of tumors, but does not look at RECIST ordinal outcomes",
  "Looked at measurements but not outcomes even though interrater analyses were made",
  "No assessment of ordinal outcome",
  "Not looking at the ordinal outcomes of RECIST",
  "Only evaluation of raters was with ICC"
)

lit_search$exclusion_strings$non_human_study <- c(
  "Evaluation in canine",
  "Canine study",
  "Is about dogs?"
)

lit_search$exclusion_strings$not_evaluating_recist <- c(
  "Doesn't seem to be evaluating RECIST",
  "Evaluates mRECIST, not RECIST 1.1",
  "Evaluating PERCIST",
  "Evaluating mRECIST",
  "Evaluating a model they built with train/test data based on mRECIST criteria",
  "Measuring volume",
  "not looking at RECIST",
  "Not assessing RECIST outcomes and was measuring ICC only",
  "Not evaluating RECIST",
  "Not evaluating RECIST 1.1",
  "Not evaluating reviewers on RECIST",
  "Not measuring ordinal outcomes and using non-standard imaging technique for tumors",
  "Not using RECIST, but rather several derivatives",
  "Used mRECIST and no Cohen/Fleiss kappa",
  "Uses mRECIST"

)

lit_search$exclusion_strings$irrelevant_topic <- c(
  "Irrelevant topic",
  "No relevant analysis",
  "Non relevant analyses",
  "Not evaluating anything relevant to me",
  "Not relevant",
  "Not relevant at all",
  "Not relevant to RECIST",
  "Not eligible"
)

lit_search$exclusion_strings$paywalled_article <- c(
  "No access; paywall",
  "Paywalled article",
  "Can't access article to evaluate further, and probably can't convert Light's kappa to Fleiss'"
)

lit_search$exclusion_strings$wrong_irr_stat_or_cis_not_given <- c(
  "Probably can't use this statistic",
  "Cis not available, but there is a table of the 3 observers' response classifications so I think I could reverse engineer this to at least create an estimated bootstrap CI. Need to create permuted data sets until there are several options that converge to k=0.7558, and then bootstrap these for CIs, and then average the CIs",
  "Not enough info provided (SE, data) to use this",
  "Cites another study with relevant data,by Liu…Wang"
)










lit_search$data[1:3] %>%
  bind_rows(.id = "Worksheet") %>%
  arrange(Relevant, Notes) %>%
  mutate(
    prisma_status = case_when(
      Relevant == "Duplicate" ~ "Duplicate",
      Notes %in% lit_search$exclusion_strings$compare_to_or_using_other_frameworks_string ~ "Comparing RECIST to other frameworks or metrics",
      Notes %in% lit_search$exclusion_strings$relevant_but_no_irr ~ "No Interrater Reliability Analysis",
      Notes %in% lit_search$exclusion_strings$measured_sld_variability_but_not_recist_outcome ~ "Interrater Reliability Analysis of non-RECIST scale",
      Notes %in% lit_search$exclusion_strings$non_human_study ~ "Non-human Study",
      Notes %in% lit_search$exclusion_strings$not_evaluating_recist ~ "Not evaluing RECIST",
      Notes %in% lit_search$exclusion_strings$irrelevant_topic ~ "Irrelevant topic",
      Notes %in% lit_search$exclusion_strings$paywalled_article ~ "Paywalled article",
      Notes %in% lit_search$exclusion_strings$wrong_irr_stat_or_cis_not_given ~ "Insufficient statistics provided",
      Relevant == "Yes" & !is.na(Statistic) & stringr::str_detect(Statistic, "Fleiss") ~ "Fleiss' Kappa",
      Relevant == "Yes" & !is.na(Statistic) & stringr::str_detect(Statistic, "Cohen") ~ "Cohen's Kappa"
    )
  ) %>%
  mutate(Worksheet = if_else(Worksheet %in% c("PubMed1", "PubMed2"), "PubMed", Worksheet)) %>%
  count(Worksheet, prisma_status) %>%
  View()




# setdiff(
#   pull(lit_search$data[[4]], PMID),
#   pull(lit_search$data[5:6] %>% bind_rows(), PMID)
# )










library(DiagrammeR)


grViz("
digraph prisma {
  graph [layout = dot, rankdir = TB]

  node [shape = box, style = filled, fillcolor = lightgrey, fontname = Helvetica]

  A [label = 'Records identified through database searching\\n(n = 131)']
  B [label = 'Records after duplicates removed\\n(n = 115)']
  C [label = 'Records screened\\n(n = 115)']
  D [label = 'Records excluded\\n(n = 22)']
  DD [label = 'Irrelevant Topic\\n(n = 15)']
  DDD [label = 'Paywalled article\\n(n = 4)']
  DDDD [label = 'Non-humann study\\n(n = 3)']
  E [label = 'Full-text articles assessed for eligibility\\n(n = 93)']
  F [label = 'Full-text articles excluded\\n(n = 82)']
  G [label = 'Studies included in\nquantitative synthesis (meta-analysis)\\n(n = 11)']
  H [label = 'Cohens Kappa\\n(n = 5)']
  I [label = 'Fleiss Kappa\\n(n = 6)']
  FF [label = 'Comparing RECIST to other\nframeworks or metrics\\n(n = 36)']
  FFF [label = 'Not evaluing\nRECIST\\n(n = 25)']
  FFFF [label = 'No Interrater\nReliability Analysis\\n(n = 7)']
  FFFFF [label = 'Insufficient\nstatistics\\n(n = 4)']

  A -> B -> C
  C -> D
  D -> DD
  D -> DDD
  D -> DDDD
  C -> E
  E -> F
  E -> G
  G -> H
  G -> I
  F -> FF
  F -> FFF
  F -> FFFF
  F -> FFFFF
}
")

B.2.0.4 thesis_code/irr_analyses/irr_exploration.R

 

# Categorical data: Cohen's kappa for 2 raters, and Fleiss' kappa for >2 raters
# Continuous data: intra-class correlation (ICC) as this calculation includes a variance component

library(irr)

# Example data: 10 items assessed by 3 raters
# Let's assume the 4 possible outcomes are coded as 1, 2, 3, and 4
ratings <- matrix(c(0, 0, 3, 0,  # All raters chose category 3 for the first item
                    0, 2, 1, 0,  # Two raters chose category 2, one chose category 3 for the second item
                    0, 0, 0, 3,  # All raters chose category 4 for the third item
                    1, 1, 1, 0,  # Each category was chosen once for the fourth item
                    2, 1, 0, 0,  # ... and so on for the rest of the items
                    0, 0, 2, 1,
                    0, 3, 0, 0,
                    3, 0, 0, 0,
                    1, 0, 0, 2,
                    0, 1, 0, 2),
                  nrow = 10, byrow = TRUE,
                  dimnames = list(NULL, c("Category1", "Category2", "Category3", "Category4")))



# Calculate Fleiss' Kappa for inter-rater reliability
kappa_results <- kappam.fleiss(as.table(ratings))

kappa_results


library(boot)

fleiss_kappa_func <- function(data, indices) {
  # Resample the data
  sample_data <- data[indices, ]
  # Calculate Fleiss' Kappa for the resampled data
  kappa <- kappam.fleiss(sample_data)$value
  return(kappa)
}

# Bootstrapping
set.seed(123) # For reproducibility
(results <- boot(data = ratings, statistic = fleiss_kappa_func, R = 10000))

se_kappa <- sd(results$t)

B.2.0.5 thesis_code/irr_analyses/irr_kappa_meta_analysis.R

 
library(magrittr)
library(dplyr)
library(metafor)
library(bayesmeta)


# NEED TO IMPORT THE DATA FILE WITH THE KAPPA VALUES

source("./irr_analyses/calculate_irr.R")
meta_analysis_data <- list()

meta_analysis_data$fleiss_literature <- tibble(
  Stat = "Fleiss",
  Study = c('PMID29633000','PMID30398433','PMID33012693','PMID33401109','PMID25488429','PMID39658684'),
  Author = c("Tovoli et al.", "Kuhl et al.", "Ghosn et al.", "Zimmermann et al.", "Oubel et al.", "El Homsi et al."),
  Estimate = c(0.741, 0.72, 0.77, 0.857, 0.72, 0.68),
  SE = c(0.054, 0.01785, 0.0969, 0.0768, 0.088, 0.0357)
)

meta_analysis_data$cohen_literature <- tibble(
  Stat = "Cohen",
  Study = c('PMID26982677','PMID27083205','PMID29934024','PMID29657622'),
  Author = c("Aghighi et al.", "Felsch et al.", "Karmakar et al.", "Ghobrial et al."),
  Estimate = c(0.82, 0.53, 0.51, 0.793),
  SE = c(0.0357, 0.0357, 0.0847, 0.1122)
)

meta_analysis_data$trials_overall <- tibble(
  Stat = "Fleiss",
  Study = names(fleiss_kappa_estimates_ses),
  Author = names(fleiss_kappa_estimates_ses)
) %>%
  bind_cols(map_df(fleiss_kappa_estimates_ses, ~.x[["overall"]]) )

# meta_analysis_data$trials_first <- tibble(
#   Stat = "Fleiss",
#   Study = names(fleiss_kappa_estimates_ses)
# ) %>%
#   bind_cols(map_df(fleiss_kappa_estimates_ses, ~.x[["first"]]) )
#
# meta_analysis_data$trials_last <- tibble(
#   Stat = "Fleiss",
#   Study = names(fleiss_kappa_estimates_ses)
# ) %>%
#   bind_cols(map_df(fleiss_kappa_estimates_ses, ~.x[["last"]]) )



# Step 1: Define the logit and inverse logit transformations for kappa
logit_kappa <- function(kappa) log((kappa + 1) / (1 - kappa))
inv_logit_kappa <- function(x) (exp(x) - 1) / (exp(x) + 1)

# Step 2: Apply the logit transformation and delta method for SE
meta_analysis_data_all <- meta_analysis_data %>%
  bind_rows() %>%
  mutate(
    logit_kappa = logit_kappa(Estimate),
    logit_se = SE * 2 / ((1 - Estimate^2) + 1e-8)  # delta method; small constant to ensure no division by zero
  )



###### OVERALL META ANALYSIS OPTIONS


## FREQUENTIST

# Conduct random-effects meta-analysis
rma_overall_freq <- rma(
  yi = logit_kappa,
  sei = logit_se,
  data = arrange(meta_analysis_data_all, Estimate),
  method = "REML",
  slab = Author
)
# Print summary of results
summary(rma_overall_freq, sei=logi_se)
# Create forest plot
forest(
  rma_overall_freq,
  atransf = inv_logit_kappa,
  main = "Overall Meta-Analysis",
  xlab = "Observed IRR",
  addpred = TRUE
)
funnel(
  rma_overall_freq,
  atransf = inv_logit_kappa,
  main = "Funnel Plot of Overall Meta-Analysis",
  xlab = "Observed IRR",
  addtau2 = T,
  label = T,
  legend = F
)



reg <- regtest(rma_overall_freq)
se = seq(0,1,length = 100)
lines((coef(reg$fit)[1] + coef(reg$fit)[2]*se), se, lwd=3, lty=3, col = "red")


## BAYESMETA APPROACH
# should come up with more clear statement on how priors are selected here
# taupriordensity <- function(t){dhalfnormal(t, scale=sqrt(0.1))}
# bayes_meta_analysis_recist_log <- bayesmeta(y     = log(meta_analysis_data_all[["Estimate"]]),
#                                             sigma = meta_analysis_data_all[["SE"]],
#                                             label = meta_analysis_data_all[["Study"]],
#                                             mu.prior.mean = rma_overall_freq$b, mu.prior.sd= rma_overall_freq$se,
#                                             tau.prior= "I2")
#
#
#
# # forest(bayes_meta_analysis_recist_log, atransf = exp)
#
# forestplot(bayes_meta_analysis_recist_log, exponentiate = T)
# forest(bayes_meta_analysis_recist_log, atransf = exp)
# funnel(bayes_meta_analysis_recist_log)







########### FLEISS KAPPA ONLY

meta_analysis_data_fleiss <- meta_analysis_data_all %>%
  dplyr::filter(Stat == "Fleiss")

## FREQUENTIST

# Conduct random-effects meta-analysis
rma_fleiss_freq <- rma(
  yi = logit_kappa,
  sei = logit_se,
  data = arrange(meta_analysis_data_fleiss, Estimate),
  method = "REML",
  slab = Author
)

# Print summary of results
summary(rma_fleiss_freq)

forest(
  rma_fleiss_freq,
  atransf = inv_logit_kappa,
  main = "Fleiss' Kappa Meta-Analysis",
  xlab = "Observed IRR",
  addpred = TRUE
)
funnel(
  rma_fleiss_freq,
  atransf = inv_logit_kappa,
  main = "Funnel Plot of Fleiss' Kappa Meta-Analysis",
  xlab = "Observed IRR",
  addtau2 = T,
  label = T,
  legend = F
)

reg <- regtest(rma_fleiss_freq)
se = seq(0,1,length = 100)
lines((coef(reg$fit)[1] + coef(reg$fit)[2]*se), se, lwd=3, lty=3, col = "red")




## BAYESMETA APPROACH
# should come up with more clear statement on how priors are selected here
# taupriordensity <- function(t){dhalfnormal(t, scale=sqrt(0.1))}
# bayes_meta_analysis_recist_log_fleiss <- bayesmeta(y     = log(meta_analysis_data_fleiss[["Estimate"]]),
#                                             sigma = meta_analysis_data_fleiss[["SE"]],
#                                             label = meta_analysis_data_fleiss[["Study"]],
#                                             mu.prior.mean = rma_fleiss_freq$b, mu.prior.sd = rma_fleiss_freq$se,
#                                             tau.prior= "I2")
#
#
# # forest(bayes_meta_analysis_recist_log, atransf = exp)
#
# forestplot(bayes_meta_analysis_recist_log_fleiss, exponentiate = T)
# forest(bayes_meta_analysis_recist_log_fleiss, atransf = exp)
# funnel(bayes_meta_analysis_recist_log_fleiss)




########### COHENS KAPPA ONLY

meta_analysis_data_cohen <- meta_analysis_data_all %>%
  dplyr::filter(Stat == "Cohen")

# Conduct random-effects meta-analysis
rma_cohen_freq <- rma(
  yi = logit_kappa,
  sei = logit_se,
  data = arrange(meta_analysis_data_cohen, Estimate),
  method = "REML",
  slab = Author
)

# Print summary of results
summary(rma_cohen_freq)

forest(
  rma_cohen_freq,
  atransf = inv_logit_kappa,
  main = "Cohen's Kappa Meta-Analysis",
  xlab = "Observed IRR",
  addpred = TRUE
)
funnel(
  rma_cohen_freq,
  atransf = inv_logit_kappa,
  main = "Funnel Plot of Cohen's Kappa Meta-Analysis",
  xlab = "Observed IRR",
  addtau2 = T,
  label = T,
  legend = F
)

reg <- regtest(rma_cohen_freq)
se = seq(0,1,length = 100)
lines((coef(reg$fit)[1] + coef(reg$fit)[2]*se), se, lwd=3, lty=3, col = "red")


## BAYESMETA APPROACH
# should come up with more clear statement on how priors are selected here
# taupriordensity <- function(t){dhalfnormal(t, scale=sqrt(0.1))}
# bayes_meta_analysis_recist_log_cohen <- bayesmeta(y     = log(meta_analysis_data_cohen[["Estimate"]]),
#                                             sigma = meta_analysis_data_cohen[["SE"]],
#                                             label = meta_analysis_data_cohen[["Study"]],
#                                             mu.prior.mean = rma_cohen_freq$b, mu.prior.sd = rma_cohen_freq$se,
#                                             tau.prior = "I2")
#
#
# # forest(bayes_meta_analysis_recist_log, atransf = exp)
#
# forestplot(bayes_meta_analysis_recist_log_cohen, exponentiate = T)
# forest(bayes_meta_analysis_recist_log_cohen, atransf = exp)
# funnel(bayes_meta_analysis_recist_log_cohen)

B.3 Overall Agreement Analyses

B.3.0.1 thesis_code/chisq_overall/chisq_overall.R

 



chisq_overall_data <- list()

if (!"rs_files_with_multiple_raters" %in% ls()){
  rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")
}


chisq_overall_data$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
  arrange(USUBJID, RSDY) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSSTRESC != "" & !is.na(RSSTRESC),
    !RSEVALID %in% c(""),
    !is.na(RSDY) & RSDY > 0
  )


chisq_overall_data$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
  arrange(UNI_TRNC, RSDY, RSTEST) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  )


chisq_overall_data$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
  arrange(USUBJID, RSDY) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSCAT == "RECIST 1.1"
  ) %>%
  # one instance of a duplicate--remove with distinct
  distinct(USUBJID, RSEVALID, RSDY, .keep_all = T) %>%
  dplyr::filter(RSSTRESC != "BASELINE")




chisq_test_overall_results <- imap(chisq_overall_data, ~{
  .x %>%
    select(RSEVALID, RSSTRESC) %>%
    mutate(
      RSSTRESC = toupper(RSSTRESC),
      RSSTRESC = factor(RSSTRESC, levels = c("CR", "PR", "SD", "PD", "NE", "NON-CR/NON-PD"))
    ) %>%
    table() %>%
    chisq.test()
})


imap(chisq_test_overall_results, ~{
  .x[["observed"]] %>%
    # addmargins() %>%
    as.data.frame.matrix(row.names = extract2(attr(., "dimnames"), 1) ) %>%
    rownames_to_column(var = "RSEVALID") %>%
    gt::gt(rowname_col = "RSEVALID") %>%
    gt::tab_header(title = glue::glue("{.y}: Contingency Table of Raters vs. RECIST Ratings")) %>%
    gt::tab_footnote(
      footnote = glue::glue("{.x[['method']]} yields a p-value of {round(.x[['p.value']], 4)} on {.x[['parameter']]} degrees of freedom")
    ) %>%
    gt::tab_spanner(
      label = "Evaluable",
      columns = c("CR", "PR", "SD", "PD")
    ) %>%
    gt::tab_spanner(
      label = "Not Evaluable",
      columns = c("NE", "NON-CR/NON-PD")
    )
})

B.3.0.2 thesis_code/chisq_overall/friedman_overall.R

 
# Just source in the data prep from the other file
source("chisq_overall/chisq_overall.R")

# temp_data <- chisq_overall_data$NCT02395172 %>%
#   select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
#   pivot_wider(names_from = RSEVALID, values_from = RSSTRESC) %>%
#   select(-1,-2) %>%
#   na.omit() %>%
#   # mutate(across(everything(), ~as.numeric(as.factor(.x)))) %>%
#   as.matrix()
#
# friedman.test(temp_data)



map(chisq_overall_data, ~{
  .x %>%
    select(matches("USUBJID|UNI_ID"), RSDY, RSEVALID, RSSTRESC) %>%
    pivot_wider(names_from = RSEVALID, values_from = RSSTRESC) %>%
    select(-1,-2) %>%
    na.omit() %>%
    as.matrix() %>%
    friedman.test()
})

B.3.0.3 thesis_code/chisq_overall/lme_overall.R

 

# Just source in the data prep from the other file
source("chisq_overall/chisq_overall.R")

library(lme4)


# temp_data <- mutate(
#   chisq_overall_data$NCT03631706,
#   RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
# )
#
# temp_model <- lmer(
#   as.numeric(as.factor(RSSTRESC)) ~ RSEVALID + (1 | USUBJID), data = temp_data
# )
#
# emmeans::emmeans(
#   temp_model,
#   pairwise ~ RSEVALID
# )

overall_outcomes_lme <- map(chisq_overall_data, ~{
  .x %<>%
    rename("USUBJID" = matches("USUBJID|UNI_ID")) %>%
    dplyr::filter(RSSTRESC != "NE") %>%
    mutate(
      RSSTRESC = factor(RSSTRESC, levels = c("PD","NON-CR/NON-PD","SD","PR","CR"))
    )

  temp_data <- mutate(
    .x,
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )

  temp_model <- lmer(
    as.numeric(as.factor(RSSTRESC)) ~ RSEVALID + (1 | USUBJID), data = temp_data
  )

  emmeans_model <- emmeans::emmeans(
    temp_model,
    pairwise ~ RSEVALID
  )

  return(list(
    mixed_model = temp_model,
    emmeans = emmeans_model
  ))

})



lme_latex_equations <- overall_outcomes_lme %>%
  map(~{
    .x[["mixed_model"]] %>%
      equatiomatic::extract_eq(use_coefs = T)
  })


lme_contrasts_table_only <- overall_outcomes_lme %>%
  map(~{
    .x[["emmeans"]][["contrasts"]] %>%
      as.data.frame() %>%
      tibble() # strip summary_emm class and associated info
  })


saveRDS(
  list(lme_latex_equations = lme_latex_equations,
       lme_contrasts_table_only = lme_contrasts_table_only),
  file = "transfer/lme_overall_results.rds"
  )

B.4 Objective Response Rate Analyses

B.4.0.1 thesis_code/response_rate/objective_response_rate.R

 
# TO BE COMPLETED STILL


# Want to calculate the proportions by Rater for each study
# Then I want to do a Chi-squared test to see if there is a notable
# difference at all. If a difference is detected, then I could use the
# Marascuilo procedure which is a multiple comparison test used to determine
# which specific proprotions differ significantly from one another

objective_response_rate_data <- list()

if (!"rs_files_with_multiple_raters" %in% ls()){
  rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")
}

objective_response_rate_data$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
  arrange(USUBJID, RSDY) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSSTRESC != "" & !is.na(RSSTRESC),
    !RSEVALID %in% c(""),
    !is.na(RSDY) & RSDY > 0
  )



objective_response_rate_data$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
  arrange(UNI_TRNC, RSDY, RSTEST) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  )




objective_response_rate_data$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
  arrange(USUBJID, RSDY) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSCAT == "RECIST 1.1"
  ) %>%
  # one instance of a duplicate--remove with distinct
  distinct(USUBJID, RSEVALID, RSDY, .keep_all = T)





orr_chisq_summarized_tables <- map(objective_response_rate_data, ~{
  temp <- .x %>%
    group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
    summarize(
      responder = any(RSSTRESC %in% c("CR", "PR"))
    ) %>%
    group_by(RSEVALID) %>%
    summarize(
      count_true = sum(responder),
      count_false = sum(responder == FALSE)
    )
})

orr_chisq_tests <- map(orr_chisq_summarized_tables, ~{
  temp <- as.matrix(select(.x, -1))
  dimnames(temp) <- list(unique(.x[["RSEVALID"]]), c("count_true", "count_false"))
  chisq.test(temp)
})


# saveRDS(objective_response_rate_data, "./presentation/objective_response_rate_data.rds")
orr_chisq_tests$NCT02395172

imap(orr_chisq_tests, ~{
    tibble(
      Study = .y,
      sample_size = sum(.x[["observed"]]),
      p_value = round(.x[["p.value"]], 4),
      test_statistic = round(.x[["statistic"]], 2),
      df = .x[["parameter"]]
    )
}) %>%
  bind_rows() %>%
  gt::gt() %>%
  gt::tab_header(title = glue::glue("Objective Response Rate: Comparing Raters using Chi-Squared Test")) %>%
  gt::tab_footnote(
    footnote = "No differences in the distribution of rating participants as having an objective response was detected in any of the 3 trials. Failing to reject the null hypothesis of no difference, however, does not necessarily imply equivalence. Later analyses will look at this in more detail."
  )

B.4.0.2 thesis_code/response_rate/orr_cochran_q.R

 

# Cochran's Q omnibus test
source("response_rate/objective_response_rate.R")

orr_cochrans_q <- map(objective_response_rate_data, ~{
  .x %>%
    group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
    summarize(
      responder = any(RSSTRESC %in% c("CR", "PR"))
    ) %>%
    ungroup() %>%
    pivot_wider(names_from = RSEVALID, values_from = responder) %>%
    select(-1) %>%
    as.matrix() %>%
    na.omit() %>%
    nonpar::cochrans.q()
})



# Post-hoc McNemar pairwise tests

orr_mcnemars_tests <- map(objective_response_rate_data, ~{
  data_prep <- .x %>%
    group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
    summarize(
      responder = any(RSSTRESC %in% c("CR", "PR"))
    ) %>%
    ungroup() %>%
    pivot_wider(names_from = RSEVALID, values_from = responder) %>%
    select(-1)

  reader1_reader2 <- data_prep %>%
    select("READER 1", "READER 2") %>%
    table()

  reader1_site <- data_prep %>%
    select("READER 1", "SITE INVESTIGATOR") %>%
    table()

  reader2_site <- data_prep %>%
    select("READER 2", "SITE INVESTIGATOR") %>%
    table()

  mcnemar_reader1_reader2 <- mcnemar.test(reader1_reader2)
  mcnemar_reader1_site <- mcnemar.test(reader1_site)
  mcnemar_reader2_site <- mcnemar.test(reader2_site)

  browser()

  p_vals <- c(
    reader1_reader2 = mcnemar_reader1_reader2$p.value,
    reader1_site = mcnemar_reader1_site$p.value,
    reader2_site = mcnemar_reader2_site$p.value
  )

  adjusted_p_vals <- p.adjust(p_vals, method = "bonferroni")

  return(list(
    adjusted_p_vals = adjusted_p_vals,
    two_way_tables = list(reader1_reader2 = reader1_reader2,
                          reader1_site = reader1_site,
                          reader2_site = reader2_site)
      )
    )
})


saveRDS(
  list(orr_cochrans_q = orr_cochrans_q,
       orr_mcnemars_tests = orr_mcnemars_tests
       ),
  file = "transfer/orr_cochrans_mcnemars.rds"
)

B.5 Survival Endpoint Analyses

B.5.0.1 thesis_code/survival_analyses/survival_data_duration_response.R

 
rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")


survival_data_duration_response <- list()



survival_data_duration_response$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSORRES != "",
    RSDY > 0,
    !is.na(RSEVALID), !RSEVALID == ""
  ) %>%
  group_by(USUBJID, RSEVALID) %>%
  select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
  mutate(
    first_response = (RSSTRESC %in% c("CR", "PR") & cumsum(RSSTRESC %in% c("CR", "PR")) == 1 ),
    first_disease_progression = (RSSTRESC %in% c("PD") & cumsum(RSSTRESC %in% c("PD")) == 1),
    end_date = first_disease_progression | (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(any(first_response == TRUE)) %>%
  dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
  mutate(
    time_interval = RSDY - lag(RSDY),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  ) %>%
  dplyr::filter(!is.na(time_interval))





survival_data_duration_response$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  ) %>%
  arrange(UNI_ID, RSDY) %>%
  group_by(UNI_ID, RSEVALID) %>%
  select(UNI_ID, RSDY, RSEVALID, RSSTRESC) %>%
  mutate(
    first_response = (RSSTRESC %in% c("CR", "PR") & cumsum(RSSTRESC %in% c("CR", "PR")) == 1 ),
    first_disease_progression = (RSSTRESC %in% c("PD") & cumsum(RSSTRESC %in% c("PD")) == 1),
    end_date = first_disease_progression | (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(any(first_response == TRUE)) %>%
  dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
  mutate(
    time_interval = RSDY - lag(RSDY),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  ) %>%
  dplyr::filter(!is.na(time_interval))






survival_data_duration_response$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP"
  ) %>%
  arrange(USUBJID, RSDY) %>%
  group_by(USUBJID, RSEVALID) %>%
  select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
  mutate(
    first_response = (RSSTRESC %in% c("CR", "PR") & cumsum(RSSTRESC %in% c("CR", "PR")) == 1 ),
    first_disease_progression = (RSSTRESC %in% c("PD") & cumsum(RSSTRESC %in% c("PD")) == 1),
    end_date = first_disease_progression | (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(any(first_response == TRUE)) %>%
  dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
  mutate(
    time_interval = RSDY - lag(RSDY),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  ) %>%
  dplyr::filter(!is.na(time_interval))

B.5.0.2 thesis_code/survival_analyses/survival_data_time_progression.R

 
survival_data_progression <- list()

rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")


survival_data_progression$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSORRES != "",
    RSDY > 0,
    !is.na(RSEVALID), !RSEVALID == ""
  ) %>%
  group_by(USUBJID, RSEVALID) %>%
  select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
  dplyr::filter(
    (RSSTRESC == "PD") |
      (RSSTRESC != "PD" & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(RSDY == min(RSDY)) %>%
  arrange(USUBJID, RSEVALID, RSDY) %>%
  mutate(
    general_status = if_else(RSSTRESC == "PD", 1, 0),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )







survival_data_progression$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
  # mutate(
  #   RSEVALID = if_else(RSEVAL == "SITE INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
  #                      RSEVAL,
  #                      RSEVALID)
  # ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  ) %>%
  arrange(UNI_ID, RSDY) %>%
  group_by(UNI_ID, RSEVALID) %>%
  select(UNI_ID, RSDY, RSEVALID, RSSTRESC) %>%
  dplyr::filter(
    (RSSTRESC %in% c("PD")) |
      (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(RSDY == min(RSDY)) %>%
  arrange(UNI_ID, RSEVALID, RSDY) %>%
  mutate(
    general_status = if_else(RSSTRESC %in% c("PD"), 1, 0),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )




survival_data_progression$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "SITE INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP"
  ) %>%
  arrange(USUBJID, RSDY) %>%
  group_by(USUBJID, RSEVALID) %>%
  select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
  dplyr::filter(
    (RSSTRESC %in% c("PD")) |
      (all(!RSSTRESC %in% c("PD")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(RSDY == min(RSDY)) %>%
  arrange(USUBJID, RSEVALID, RSDY) %>%
  mutate(
    general_status = if_else(RSSTRESC %in% c("PD"), 1, 0),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )

B.5.0.3 thesis_code/survival_analyses/survival_data_time_response.R

 
survival_data_response <- list()

rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")


survival_data_response$NCT02395172 <- rs_files_with_multiple_raters[[1]] %>%
  dplyr::filter(
    RSTESTCD == "OVRLRESP",
    RSORRES != "",
    RSDY > 0,
    !is.na(RSEVALID), !RSEVALID == ""
  ) %>%
  group_by(USUBJID, RSEVALID) %>%
  select(USUBJID, RSDY, RSEVALID, RSORRES, RSSTRESC) %>%
  dplyr::filter(
    (RSSTRESC %in% c("PR", "CR")) |
      (all(!RSSTRESC %in% c("PR", "CR")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(RSDY == min(RSDY)) %>%
  arrange(USUBJID, RSEVALID, RSDY) %>%
  mutate(
    general_status = if_else(RSSTRESC %in% c("CR", "PR"), 1, 0),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )





survival_data_response$NCT03434379 <- rs_files_with_multiple_raters[[2]] %>%
  arrange(UNI_ID, RSEVAL) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP",
    (RSGRPID %in% c("TIMEPOINT REVIEW", "") | is.na(RSGRPID))
  ) %>%
  group_by(UNI_ID, RSEVALID) %>%
  select(UNI_ID, RSDY, RSEVALID, RSSTRESC) %>%
  dplyr::filter(
    (RSSTRESC %in% c("PR", "CR")) |
      (all(!RSSTRESC %in% c("PR", "CR")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(RSDY == min(RSDY)) %>%
  arrange(UNI_ID, RSEVALID, RSDY) %>%
  mutate(
    general_status = if_else(RSSTRESC %in% c("PR", "CR"), 1, 0),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )



survival_data_response$NCT03631706 <- rs_files_with_multiple_raters[[3]] %>%
  arrange(USUBJID, RSEVALID) %>%
  mutate(
    RSEVALID = if_else(RSEVAL == "INVESTIGATOR" & (is.na(RSEVALID) | RSEVALID == ""),
                       RSEVAL,
                       RSEVALID)
  ) %>%
  dplyr::filter(
    RSCAT == "RECIST 1.1",
    RSTESTCD == "OVRLRESP"
  ) %>%
  group_by(USUBJID, RSEVALID) %>%
  select(USUBJID, RSDY, RSEVALID, RSSTRESC) %>%
  dplyr::filter(
    (RSSTRESC %in% c("PR", "CR")) |
      (all(!RSSTRESC %in% c("PR", "CR")) & RSDY == max(RSDY))
  ) %>%
  dplyr::filter(RSDY == min(RSDY)) %>%
  arrange(USUBJID, RSEVALID) %>%
  mutate(
    general_status = if_else(RSSTRESC %in% c("PR", "CR"), 1, 0),
    RSEVALID = factor(RSEVALID, levels = c("SITE INVESTIGATOR", "READER 1", "READER 2"))
  )

B.5.0.4 thesis_code/survival_analyses/survival_duration_response_function.R

 survival_duration_response <- function(survival_data_duration_response, plot = TRUE) {
    imap(survival_data_duration_response, ~{

    data <- .x
    survival_progression_object <- survminer::surv_fit(Surv(RSDY, first_disease_progression) ~ RSEVALID, data = data)

    title <- paste0(.y, ": Duration of Response")
    legend_labels <- levels(.x$RSEVALID)

    if (plot) {
      survival_plot <- survminer::ggsurvplot(
        survival_progression_object,
        data = data,
        tables.theme = theme_cleantable(),
        risk.table = T,
        pval = TRUE,
        title = title,
        palette = c("#86251b", "#076d7e", "#6ad2e2"),
        legend.title = "Tumor Response Evaluator:",
        legend.labs = legend_labels,
        xlab = "Time (Days)",
        ggtheme = theme_bw() +
          theme(
            plot.title = element_text(size = 18, hjust = 0.5),
            legend.title = element_text(size = 13),
            legend.text = element_text(size = 13),
            axis.title = element_text(size = 14),
            axis.text = element_text(size = 11)
          )
      )
    } else {
      survival_plot <- NULL
    }

    surv_obj_overall <- Surv(time = .x[["RSDY"]], event = .x[["first_disease_progression"]] )

    log_rank_test <- survminer::pairwise_survdiff(
      Surv(time = RSDY, event = first_disease_progression) ~ RSEVALID,
      data = .x,
      p.adjust.method = "BH"
    )

    cox_model <- tryCatch(coxph(surv_obj_overall ~ RSEVALID, data = .x), error = function(e) NA)
    emmeans_result <- tryCatch(emmeans::emmeans(cox_model, pairwise ~ RSEVALID), error = function(e) NA)

    return(
      list(
        survival_plot = survival_plot,
        surv_obj_overall = surv_obj_overall,
        log_rank_test = log_rank_test,
        cox_model = cox_model,
        emmeans_result = emmeans_result
      )
    )


  })
}

B.5.0.5 thesis_code/survival_analyses/survival_duration_response.R

 


library(survival)
library(stringr)
library(ggplot2)
library(survminer)
library(metafor)

source("~/study_identification/survival_analyses/survival_data_duration_response.R")

survival_duration_response <- imap(survival_data_duration_response, ~{

  .x %<>%
    rename(USUBJID = matches("UNI_ID|USBUBJID"))

  data <- .x
  survival_progression_object <- survminer::surv_fit(Surv(time_interval, first_disease_progression) ~ RSEVALID, data = data)

  title <- paste0(.y, ": Duration of Response")
  legend_labels <- levels(.x$RSEVALID)

  survival_plot <- survminer::ggsurvplot(
    survival_progression_object,
    data = data,
    tables.theme = theme_cleantable(),
    risk.table = T,
    pval = TRUE,
    pval.method = TRUE,
    title = title,
    palette = c("#86251b", "#076d7e", "#6ad2e2"),
    legend.title = "Tumor Response Evaluator:",
    legend.labs = legend_labels,
    xlab = "Time (Days)",
    ggtheme = theme_bw() +
      theme(
        plot.title = element_text(size = 18, hjust = 0.5),
        legend.title = element_text(size = 13),
        legend.text = element_text(size = 13),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 11)
      )
  )


  surv_obj_overall <- Surv(time = .x[["time_interval"]], event = .x[["first_disease_progression"]] )


  log_rank_test <- survdiff(
    Surv(data$time_interval, data$first_disease_progression) ~ RSEVALID,
    data = data
    )

  pairwise_survdiff <- survminer::pairwise_survdiff(
    Surv(time = time_interval, event = first_disease_progression) ~ RSEVALID,
    data = .x,
    p.adjust.method = "BH"
  )

  cox_model <- coxph(surv_obj_overall ~ RSEVALID, data = .x, cluster = USUBJID)
  emmeans_result <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)

  return(
    list(
      survival_progression_object = survival_progression_object,
      survival_plot = survival_plot,
      surv_obj_overall = surv_obj_overall,
      log_rank_test = log_rank_test,
      pairwise_survdiff = pairwise_survdiff,
      cox_model = cox_model,
      emmeans_result = emmeans_result
    )
  )


})


# saveRDS(survival_duration_response, "./presentation/survival_duration_response_object.rds")

# survival_duration_response



imap(survival_duration_response,
    ~{ggcoxzph(
      cox.zph(.x[["cox_model"]])
    )+
        labs(title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Response"))

      ggsave(
        paste0("./transfer/survival_images/", .y, "_dor_cox_zph.jpeg"),
        units = "px", width = 4160, height = 2880
      )
      }
)

imap(survival_duration_response, ~
      {ggsurvplot(
        .x[["survival_progression_object"]],
        fun = "cloglog", # Complementary log-log = log(-log(S(t)))
        palette = "Dark2",
        xlab = "Time (Days)",
        ylab = "log(-log(Survival))",
        ggtheme = theme_minimal() + theme(legend.text = element_text(size = 12),
                                          legend.title = element_text(size = 15),
                                          plot.title = element_text(size = 16),
                                          axis.title = element_text(size = 14),
                                          axis.text = element_text(size = 12)
                                          )
      ) +
          labs(
            title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Response with log(-log(Survival)) check")
          )

        ggsave(
          paste0("./transfer/survival_images/", .y, "_dor_cox_cloglog.jpeg"),
          units = "px", width = 4160, height = 2880
        )
        }
)

map(survival_duration_response,
    ~.x[["log_rank_test"]]
)


imap(survival_duration_response, ~{
  .x[["survival_plot"]]

  # can't have combined plot and table saved with ggplot
  # ggsave(
  #   paste0("./transfer/survival_images/", .y, "_dor_kaplan_meier.jpeg"),
  #   units = "px", width = 4160, height = 2880
  # )
})

survival_duration_response_meta_data <- survival_duration_response %>%
  map(~map(.x["emmeans_result"], ~{
    as.data.frame(.x$contrasts)
  })) %>%
  unlist(recursive = FALSE) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_remove(id, ".emmeans_result") %>%
           paste0(., ": ", contrast)
  ) %>%
  arrange(id) %>%
  dplyr::filter(
    str_detect(id, "ONCOLOGIST|INVESTIGATOR")
  )


saveRDS(survival_duration_response_meta_data, "./transfer/survival_dor_emmeans.rds")


# comparison_counts <- survival_data_duration_response %>%
#   map(
#     ~{
#       data <- rename(.x, USUBJID = matches("UNI_ID|USUBJID"))
#       distinct(data, USUBJID, RSEVALID) %>%
#         group_by(USUBJID) %>%
#         summarize(
#           investigator_reader1 = sum(any(RSEVALID == "SITE INVESTIGATOR") & any(RSEVALID == "READER 1")),
#           investigator_reader2 = sum(any(RSEVALID == "SITE INVESTIGATOR") & any(RSEVALID == "READER 2"))) %>%
#         ungroup() %>%
#         summarize(
#           investigator_reader1 = sum(investigator_reader1 ),
#           investigator_reader2 = sum(investigator_reader2)
#         )
#     }
#   )





survival_duration_response %>%
  map(~{
    .x[["cox_model"]][["n"]]
  })

rma_survival_duration_response <- rma(
  yi = estimate,
  sei = SE,
  data = survival_duration_response_meta_data,
  method = "REML",
  slab = id
)

# Print summary of results
summary(rma_survival_duration_response)
# Create forest plot
forest(rma_survival_duration_response,
       # atransf = exp,
       main = "Duration of Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR)",
       addpred = T)
funnel(rma_survival_duration_response,
       # atransf = exp,
       main = "Duration of Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR)")








# Define equivalence bounds
eq_lower <- 0.8
eq_upper <- 1.25
# Estimate standard error from CI (approximate)
# se <- (ci_upper - ci_lower) / (2 * qnorm(0.975))

# Perform TOST equivalence test
# Since we are comparing to a reference HR of 1, we use m1 = log(hr), mu = 0
TOST_duration_of_response_result <- TOSTER::TOSTone.raw(
  m = rma_survival_duration_response$beta[1],
  mu = 0,
  sd = rma_survival_duration_response$se * sqrt(6),
  n = 6, # Placeholder, since we use raw values
  low_eqbound = log(eq_lower),
  high_eqbound = log(eq_upper),
  alpha = 0.05
)

print(TOST_duration_of_response_result)






#
# surv_obj_overall <- Surv(time = survival_data_duration_response[[3]]$RSDY, event = survival_data_duration_response[[3]]$first_disease_progression)
#
# survfit(
#   surv_obj_overall ~ RSEVALID,
#   data = survival_data_duration_response[[3]]
# ) %>%
#   survminer::ggsurvplot(risk.table = T, title = "Trial 3: Response Duration")
#
#
# cox_model <- coxph(surv_obj_overall ~ RSEVALID, data = survival_data_duration_response[[3]])
#
# res <- survminer::pairwise_survdiff(
#   Surv(time = RSDY, event = first_disease_progression) ~ RSEVALID,
#   data = survival_data_duration_response[[3]],
#   p.adjust.method = "BH"
# )
#
# res
#
# emmeans::emmeans(cox_model, pairwise ~ RSEVALID)

B.5.0.6 thesis_code/survival_analyses/survival_time_progression_function.R

 survival_time_progression <- function(survival_data_progression, plot = TRUE) {
  imap(survival_data_progression, ~{

    data <- .x
    survival_progression_object <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)

    title <- paste0(.y, ": Progression")
    legend_labels <- levels(.x$RSEVALID)

    if (plot) {
      survival_plot <- survminer::ggsurvplot(
        survival_progression_object,
        data = data,
        tables.theme = theme_cleantable(),
        risk.table = T,
        pval = TRUE,
        title = title,
        palette = c("#86251b", "#076d7e", "#6ad2e2"),
        legend.title = "Tumor Response Evaluator:",
        # legend.labs = legend_labels,
        xlab = "Time (Days)",
        ggtheme = theme_bw() +
          theme(
            plot.title = element_text(size = 18, hjust = 0.5),
            legend.title = element_text(size = 13),
            legend.text = element_text(size = 13),
            axis.title = element_text(size = 14),
            axis.text = element_text(size = 11)
          )
      )
    } else {
      survival_plot <- NULL
    }

    surv_obj_overall <- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )

    log_rank_test <- survdiff(
      Surv(data$RSDY, data$general_status) ~ RSEVALID,
      data = data
    )

    pairwise_survdiff <- survminer::pairwise_survdiff(
      Surv(time = RSDY, event = general_status) ~ RSEVALID,
      data = .x,
      p.adjust.method = "BH"
    )

    cox_model <- coxph(surv_obj_overall ~ RSEVALID, data = .x)
    emmeans_result <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)

    return(
      list(
        survival_progression_object = survival_progression_object,
        survival_plot = survival_plot,
        surv_obj_overall = surv_obj_overall,
        log_rank_test = log_rank_test,
        pairwise_survdiff = pairwise_survdiff,
        cox_model = cox_model,
        emmeans_result = emmeans_result
      )
    )


  })
}

B.5.0.7 thesis_code/survival_analyses/survival_time_progression.R

 

library(survival)
library(stringr)
library(ggplot2)
library(survminer)
library(metafor)

source("~/study_identification/survival_analyses/survival_data_time_progression.R")

survival_progression <- imap(survival_data_progression, ~{

  .x %<>%
    rename(USUBJID = matches("UNI_ID|USBUBJID"))

  data <- .x
  survival_progression_object <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)

  title <- paste0(.y, ": Progression")
  legend_labels <- levels(.x$RSEVALID)

  survival_plot <- survminer::ggsurvplot(
    survival_progression_object,
    data = data,
    tables.theme = theme_cleantable(),
    risk.table = T,
    pval = TRUE,
    pval.method = TRUE,
    title = title,
    palette = c("#86251b", "#076d7e", "#6ad2e2"),
    legend.title = "Tumor Response Evaluator:",
    legend.labs = legend_labels,
    xlab = "Time (Days)",
    ggtheme = theme_bw() +
      theme(
        plot.title = element_text(size = 18, hjust = 0.5),
        legend.title = element_text(size = 13),
        legend.text = element_text(size = 13),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 11)
      )
  )


  surv_obj_overall <- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )

  log_rank_test <- survdiff(
    Surv(data$RSDY, data$general_status) ~ RSEVALID,
    data = data
  )

  pairwise_survdiff <- survminer::pairwise_survdiff(
    Surv(time = RSDY, event = general_status) ~ RSEVALID,
    data = .x,
    p.adjust.method = "BH"
  )

  cox_model <- coxph(surv_obj_overall ~ RSEVALID, data = .x, cluster = USUBJID)
  emmeans_result <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)

  return(
    list(
      survival_progression_object = survival_progression_object,
      survival_plot = survival_plot,
      surv_obj_overall = surv_obj_overall,
      log_rank_test = log_rank_test,
      pairwise_survdiff = pairwise_survdiff,
      cox_model = cox_model,
      emmeans_result = emmeans_result
    )
  )


})



# saveRDS(survival_progression, "./presentation/survival_progression_object.rds")



imap(survival_progression,
    ~{ggcoxzph(
      cox.zph(.x[["cox_model"]])
    ) +
      labs(title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Progression"))

    ggsave(
      paste0("./transfer/survival_images/", .y, "_progression_cox_zph.jpeg"),
      units = "px", width = 4160, height = 2880
    )
    }
)

imap(survival_progression, ~{
      temp <- ggsurvplot(
        .x[["survival_progression_object"]],
        fun = "cloglog", # Complementary log-log = log(-log(S(t)))
        palette = c("#86251b", "#076d7e", "#6ad2e2"),
        xlab = "Time",
        ylab = "log(-log(Survival))",
        ggtheme = theme_minimal() + theme(legend.text = element_text(size = 12),
                                          legend.title = element_text(size = 15),
                                          plot.title = element_text(size = 16),
                                          axis.title = element_text(size = 14),
                                          axis.text = element_text(size = 12)
        )
      ) +
    labs(
      title = paste0(.y, ": Test of PH assumption for Cox regression of Time to Progression with log(-log(Survival)) check")
    )

  ggsave(
    paste0("./transfer/survival_images/", .y, "_progression_cox_cloglog.jpeg"),
    units = "px", width = 4160, height = 2880
  )

}
)

map(survival_progression,
    ~{
      .x[["log_rank_test"]]
      }
)


imap(survival_progression, ~{
  .x[["survival_plot"]]

  # can't have combined plot and table saved with ggplot
  # ggsave(
  #   paste0("./transfer/survival_images/", .y, "_progression_kaplan_meier.jpeg"),
  #   units = "px", width = 4160, height = 2880
  # )
})

survival_progression_meta_data <- survival_progression %>%
  map(~map(.x["emmeans_result"], ~{
    as.data.frame(.x$contrasts)
  })) %>%
  unlist(recursive = FALSE) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_remove(id, ".emmeans_result") %>%
           paste0(., ": ", contrast)
         ) %>%
  arrange(id) %>%
  dplyr::filter(
    str_detect(id, "ONCOLOGIST|INVESTIGATOR")
  )


saveRDS(survival_progression_meta_data, "./transfer/survival_progression_emmeans.rds")


rma_survival_progression <- rma(
  yi = estimate,
  sei = SE,
  data = survival_progression_meta_data,
  method = "REML",
  slab = id
  )

# Print summary of results
summary(rma_survival_progression)
# Create forest plot
forest(rma_survival_progression,
       # atransf = exp,
       main = "Time to Progression: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference",
       addpred = T)
funnel(rma_survival_progression,
       # atransf = exp,
       main = "Time to Progression: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference")




# Define equivalence bounds
eq_lower <- 0.8
eq_upper <- 1.25
# Estimate standard error from CI (approximate)
# se <- (ci_upper - ci_lower) / (2 * qnorm(0.975))

# Perform TOST equivalence test
# Since we are comparing to a reference HR of 1, we use m1 = log(hr), mu = 0
TOST_time_to_progression_result <- TOSTER::TOSTone.raw(
  m = (rma_survival_progression$beta[1]),
  mu = 0,
  sd = rma_survival_progression$se * sqrt(6),
  n = 6, # Placeholder, since we use raw values
  low_eqbound = log(eq_lower),
  high_eqbound = log(eq_upper),
  alpha = 0.05
)



TOST_time_to_progression_result

B.5.0.8 thesis_code/survival_analyses/survival_time_response_function.R

 survival_time_response <- function(survival_data_duration_response, plot = TRUE) {
    imap(survival_data_duration_response, ~{

    data <- .x
    survival_progression_object <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)

    title <- paste0(.y, ": Time to Response")
    legend_labels <- levels(.x$RSEVALID)

    if (plot) {
      survival_plot <- survminer::ggsurvplot(
        survival_progression_object,
        data = data,
        tables.theme = theme_cleantable(),
        risk.table = T,
        pval = TRUE,
        title = title,
        palette = c("#86251b", "#076d7e", "#6ad2e2"),
        legend.title = "Tumor Response Evaluator:",
        legend.labs = legend_labels,
        xlab = "Time (Days)",
        ggtheme = theme_bw() +
          theme(
            plot.title = element_text(size = 18, hjust = 0.5),
            legend.title = element_text(size = 13),
            legend.text = element_text(size = 13),
            axis.title = element_text(size = 14),
            axis.text = element_text(size = 11)
          )
      )
    } else {
      survival_plot <- NULL
    }

    surv_obj_overall <- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )

    log_rank_test <- survminer::pairwise_survdiff(
      Surv(time = RSDY, event = general_status) ~ RSEVALID,
      data = .x,
      p.adjust.method = "BH"
    )

    cox_model <- tryCatch(coxph(surv_obj_overall ~ RSEVALID, data = .x), error = function(e) NA)
    emmeans_result <- tryCatch(emmeans::emmeans(cox_model, pairwise ~ RSEVALID), error = function(e) NA)

    return(
      list(
        survival_plot = survival_plot,
        surv_obj_overall = surv_obj_overall,
        log_rank_test = log_rank_test,
        cox_model = cox_model,
        emmeans_result = emmeans_result
      )
    )


  })
}

B.5.0.9 thesis_code/survival_analyses/survival_time_response.R

 
library(survival)
library(stringr)
library(ggplot2)
library(survminer)
library(metafor)

source("~/study_identification/survival_analyses/survival_data_time_response.R")

survival_response <- imap(survival_data_response, ~{

  .x %<>%
    rename(USUBJID = matches("UNI_ID|USBUBJID"))

  data <- .x
  survival_progression_object <- survminer::surv_fit(Surv(RSDY, general_status) ~ RSEVALID, data = data)

  title <- paste0(.y, ": Response")
  legend_labels <- levels(.x$RSEVALID)

  survival_plot <- survminer::ggsurvplot(
    survival_progression_object,
    data = data,
    tables.theme = theme_cleantable(),
    risk.table = T,
    pval = TRUE,
    pval.method = TRUE,
    title = title,
    palette = c("#86251b", "#076d7e", "#6ad2e2"),
    legend.title = "Tumor Response Evaluator:",
    legend.labs = legend_labels,
    xlab = "Time (Days)",
    ggtheme = theme_bw() +
      theme(
        plot.title = element_text(size = 18, hjust = 0.5),
        legend.title = element_text(size = 13),
        legend.text = element_text(size = 13),
        axis.title = element_text(size = 14),
        axis.text = element_text(size = 11)
      )
  )


  surv_obj_overall <- Surv(time = .x[["RSDY"]], event = .x[["general_status"]] )


  log_rank_test <- survdiff(
    Surv(data$RSDY, data$general_status) ~ RSEVALID,
    data = data
  )

  pairwise_survdiff <- survminer::pairwise_survdiff(
    Surv(time = RSDY, event = general_status) ~ RSEVALID,
    data = .x,
    p.adjust.method = "BH"
  )

  cox_model <- coxph(surv_obj_overall ~ RSEVALID, data = .x, cluster = USUBJID)
  emmeans_result <- emmeans::emmeans(cox_model, pairwise ~ RSEVALID)

  return(
    list(
      survival_progression_object = survival_progression_object,
      survival_plot = survival_plot,
      surv_obj_overall = surv_obj_overall,
      log_rank_test = log_rank_test,
      pairwise_survdiff = pairwise_survdiff,
      cox_model = cox_model,
      emmeans_result = emmeans_result
    )
  )


})


# saveRDS(survival_response, "./presentation/survival_response_object.rds")

# survival_response


imap(survival_response,
    ~{ggcoxzph(
      cox.zph(.x[["cox_model"]])
    )+
        labs(title = paste0(.y, ": Test of proportional hazards assumption for Cox regression of Duration of Response"))

      ggsave(
        paste0("./transfer/survival_images/", .y, "_response_cox_zph.jpeg"),
        units = "px", width = 4160, height = 2880
      )
      }
)

imap(survival_response, ~
    {ggsurvplot(
      .x[["survival_progression_object"]],
      fun = "cloglog", # Complementary log-log = log(-log(S(t)))
      palette = "Dark2",
      xlab = "Time",
      ylab = "log(-log(Survival))",
      ggtheme = theme_minimal() + theme(legend.text = element_text(size = 12),
                                        legend.title = element_text(size = 15),
                                        plot.title = element_text(size = 16),
                                        axis.title = element_text(size = 14),
                                        axis.text = element_text(size = 12)
      )
    )+
        labs(
          title = paste0(.y, ": Test of PH assumption for Cox regression of Duration of Response with log(-log(Survival)) check")
        )

      ggsave(
        paste0("./transfer/survival_images/", .y, "_response_cox_cloglog.jpeg"),
        units = "px", width = 4160, height = 2880
      )
      }
)

map(survival_response,
    ~.x[["log_rank_test"]]
)


imap(survival_response, ~{
  .x[["survival_plot"]]

  # can't have combined plot and table saved with ggplot
  # ggsave(
  #   paste0("./transfer/survival_images/", .y, "_response_kaplan_meier.jpeg"),
  #   units = "px", width = 4160, height = 2880
  # )
})





survival_response_meta_data <- survival_response %>%
  map(~map(.x["emmeans_result"], ~{
    as.data.frame(.x$contrasts)
  })) %>%
  unlist(recursive = FALSE) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_remove(id, ".emmeans_result") %>%
           paste0(., ": ", contrast)
  ) %>%
  arrange(id) %>%
  dplyr::filter(
    str_detect(id, "ONCOLOGIST|INVESTIGATOR")
  )

saveRDS(survival_response_meta_data, "./transfer/survival_response_emmeans.rds")



rma_survival_response <- rma(
  yi = estimate,
  sei = SE,
  data = survival_response_meta_data,
  method = "REML",
  slab = id
)

# Print summary of results
summary(rma_survival_response)
# Create forest plot
forest(rma_survival_response,
       # atransf = exp,
       main = "Time to Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference",
       addpred = T)
funnel(rma_survival_response,
       # atransf = exp,
       main = "Time to Response: Rater Differences in Hazard Ratio", xlab = "Observed (HR) Difference")




# Define equivalence bounds
eq_lower <- 0.8
eq_upper <- 1.25
# Estimate standard error from CI (approximate)
# se <- (ci_upper - ci_lower) / (2 * qnorm(0.975))

# Perform TOST equivalence test
# Since we are comparing to a reference HR of 1, we use m1 = log(hr), mu = 0
TOST_time_to_response_result <- TOSTER::TOSTone.raw(
  m = (rma_survival_response$beta[1]),
  mu = 0,
  sd = rma_survival_response$se * sqrt(6),
  n = 6, # Placeholder, since we use raw values
  low_eqbound = log(eq_lower),
  high_eqbound = log(eq_upper),
  alpha = 0.05
)

print(TOST_time_to_response_result)

B.5.0.10 thesis_code/survival_analyses/tost_plots.R

 

lower_bound = log(0.8)
upper_bound = log(1.25)

# Line segments from estimate to CI bounds
x_range <- upper_bound - lower_bound
x_min <- lower_bound - 0.25 * x_range
x_max <- upper_bound + 0.25 * x_range


tost_data <- map2(
  list(TOST_time_to_progression_result, TOST_time_to_response_result, TOST_duration_of_response_result),
  list("Time to Progression", "Time to Response", "Duration of Response"),
  ~{
    list(
      data.frame(
        estimate = .x[["diff"]],
        CI_lower = .x[["LL_CI_TOST"]],
        CI_upper = .x[["UL_CI_TOST"]],
        y= 0
      ),
      .y
    )
  })


plot_tost_results <- function(data, title) {
  ggplot(data, aes(x = estimate, y = y)) +
    # Line segments from estimate to CI bounds
    geom_segment(aes(x = estimate, xend = CI_lower, y = y, yend = y), size = 1.2) +
    geom_segment(aes(x = estimate, xend = CI_upper, y = y, yend = y), size = 1.2) +
    # Point for estimate
    geom_point(size = 4, shape = 21, fill = "black") +
    # Equivalence bounds (same color)
    geom_vline(xintercept = lower_bound, linetype = "dashed", color = "darkgreen", size = 1) +
    geom_vline(xintercept = upper_bound, linetype = "dashed", color = "darkgreen", size = 1) +
    # Aesthetics
    scale_y_continuous(NULL, breaks = NULL) +
    scale_x_continuous(limits = c(x_min, x_max)) +
    labs(x = "ln(Hazard Ratio) Difference",
         title = paste("Two One-Sided Tests:", title),
         subtitle = "90% Confidence Interval of Estimate"
         ) +
    theme_minimal() +
    theme(
      plot.title = element_text(hjust = 0.5),
      plot.subtitle = element_text(hjust = 0.5)
      )
}




map(tost_data, ~plot_tost_results(.x[[1]], .x[[2]]))

B.6 Sensitivity Analyses

B.6.0.1 thesis_code/simulate_thresholds/irr_sim_study_one.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_one_simulation_data" %in% ls()){
#   tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }
#
# ################################ TARGET OUTCOME SIMULATION
#
#
#
# irr_simulation_target_outcome_study_one <- tr_study_one_simulation_data %>%
#   map(~{
#     .x %>%
#       select(USUBJID, RSDY, RSEVALID, target_response_calculated ) %>%
#       tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
# saveRDS(irr_simulation_target_outcome_study_one, "./simulate_thresholds/irr_simulation_target_outcome_study_one.rds")



if (!"irr_simulation_target_outcome_study_one" %in% ls()){
  irr_simulation_target_outcome_study_one <- readRDS("./simulate_thresholds/irr_simulation_target_outcome_study_one.rds")
}


study_one_original_irr_id <- all_combos %>%
  mutate(
    id = row_number()
  ) %>%
  dplyr::filter(
    progression_threshold == 0.2,
    partial_response_threshold == 0.3
  ) %>%
  pull(id)

study_one_original_irr_value_target <- irr_simulation_target_outcome_study_one[[study_one_original_irr_id]]$value


all_combos %>%
  mutate(
    fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome_study_one, ~.x[["value"]]),
    fleiss_kappa_deviation = fleiss_kappa_simulation - study_one_original_irr_value_target,
    fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_one_original_irr_value_target
  ) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0   # Ensures color transition at zero
  ) +
  theme_bw() +
  labs(
    title = "Effect of Varying Progression and Response Thresholds on IRR",
    subtitle = "RECIST 1.1 *Target* Tumor Tumor Response in Study 1",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Kappa\nDeviation"
  )












###################################### OVERALL OUTCOME SIMULATION





# irr_simulation_overall_outcome_study_one <- tr_study_one_simulation_data %>%
#   map(~{
#     .x %>%
#       select(USUBJID, RSDY, RSEVALID, overall_response_calculated  ) %>%
#       tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response_calculated ) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
#
#
# saveRDS(irr_simulation_overall_outcome_study_one, "./simulate_thresholds/irr_simulation_overall_outcome_study_one.rds")



if (!"irr_simulation_overall_outcome_study_one" %in% ls()){
  irr_simulation_overall_outcome_study_one <- readRDS("./simulate_thresholds/irr_simulation_overall_outcome_study_one.rds")
}



study_one_original_irr_value_overall <- irr_simulation_overall_outcome_study_one[[study_one_original_irr_id]]$value


all_combos %>%
  mutate(
    fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_one, ~.x[["value"]]),
    fleiss_kappa_deviation = fleiss_kappa_simulation - study_one_original_irr_value_overall,
    fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_one_original_irr_value_overall
  ) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0   # Ensures color transition at zero
  ) +
  theme_bw() +
  labs(
    title = "Effect of Varying Progression and Response Thresholds on IRR",
    subtitle = "RECIST 1.1 *Overall* Response in Study 1",
    # caption = "The intersecting black lines at x=0.20 and y=0.30 represent the actual RECIST threshold values (i.e. 20% disease progression and 30% response/decline in SLD). The values for the heatmap are thus the difference between the IRR value at the actual RECIST criteria values and the value calculated at the given combination of disease progression and disease response. This Overall Response plot also incorporates information from New Target and Non-Target Lesions as well as the recalculated Target Lesion values.",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Kappa\nDeviation"
  )

B.6.0.2 thesis_code/simulate_thresholds/irr_sim_study_three.R

 

all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_three_simulation_data" %in% ls()){
#   tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
#
study_three_simulation <- list()
#
#
# ################################ TARGET OUTCOME SIMULATION
#
#
# irr_simulation_target_outcome_study_three <- tr_study_three_simulation_data %>%
#   map(~{
#     .x %>%
#       select(USUBJID, RSDY, RSEVALID, target_response_calculated) %>%
#       tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
# saveRDS(irr_simulation_target_outcome_study_three, "./simulate_thresholds/irr_simulation_target_outcome_study_three.rds")
#


if (!"irr_simulation_target_outcome_study_three" %in% ls()){
  irr_simulation_target_outcome_study_three <- readRDS("./simulate_thresholds/irr_simulation_target_outcome_study_three.rds")
}





study_three_original_irr_id <- all_combos %>%
  mutate(
    id = row_number()
  ) %>%
  dplyr::filter(
    progression_threshold == 0.2,
    partial_response_threshold == 0.3
  ) %>%
  pull(id)



study_three_original_irr_value_target <- irr_simulation_target_outcome_study_three[[study_three_original_irr_id]]$value


study_three_simulation$plot_target_outcome <- all_combos %>%
  mutate(
    fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome_study_three, ~.x[["value"]]),
    fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_target,
    fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_target
  ) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0   # Ensures color transition at zero
  ) +
  theme_bw() +
  labs(
    title = "Effect of Varying Progression and Response Thresholds on IRR",
    subtitle = "RECIST 1.1 *Target* Response in Study 3"
  )











###################################### OVERALL OUTCOME SIMULATION



# irr_simulation_overall_outcome_study_three <- tr_study_three_simulation_data %>%
#   map(~{
#     .x %>%
#       select(USUBJID, RSDY, RSEVALID, overall_response) %>%
#       tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
#
# saveRDS(irr_simulation_overall_outcome_study_three, "./simulate_thresholds/irr_simulation_overall_outcome_study_three.rds")



if (!"irr_simulation_overall_outcome_study_three" %in% ls()){
  irr_simulation_overall_outcome_study_three <- readRDS("./simulate_thresholds/irr_simulation_overall_outcome_study_three.rds")
}





study_three_original_irr_value_overall <- irr_simulation_overall_outcome_study_three[[study_three_original_irr_id]]$value


study_three_simulation$plot_overall_outcome <- all_combos %>%
  mutate(
    fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_three, ~.x[["value"]]),
    fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_overall,
    fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_overall
  ) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0   # Ensures color transition at zero
  ) +
  theme_bw() +
  labs(
    title = "Effect of Varying Progression and Response Thresholds on IRR",
    subtitle = "RECIST 1.1 *Overall* Response in Study 3")





study_three_simulation

# saveRDS(study_three_simulation, "./presentation/study_three_simulation.rds")

B.6.0.3 thesis_code/simulate_thresholds/irr_sim_study_two.R

 
all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_two_simulation_data" %in% ls()){
#   tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }
#
# ################################ TARGET OUTCOME SIMULATION
#
#
#
# irr_simulation_target_outcome_study_two <- tr_study_two_simulation_data %>%
#   map(~{
#     .x %>%
#       select(UNI_ID.x, RSDY, RSEVALID, target_response_calculated ) %>%
#       tidyr::unite("Unique_Observation", c("UNI_ID.x", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
# saveRDS(irr_simulation_target_outcome_study_two, "./simulate_thresholds/irr_simulation_target_outcome_study_two.rds")



if (!"irr_simulation_target_outcome_study_two" %in% ls()){
  irr_simulation_target_outcome_study_two <- readRDS("./simulate_thresholds/irr_simulation_target_outcome_study_two.rds")
}


study_two_original_irr_id <- all_combos %>%
  mutate(
    id = row_number()
  ) %>%
  dplyr::filter(
    progression_threshold == 0.2,
    partial_response_threshold == 0.3
  ) %>%
  pull(id)

study_two_original_irr_value_target <- irr_simulation_target_outcome_study_two[[study_two_original_irr_id]]$value


all_combos %>%
  mutate(
    fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome_study_two, ~.x[["value"]]),
    fleiss_kappa_deviation = fleiss_kappa_simulation - study_two_original_irr_value_target,
    fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_two_original_irr_value_target
  ) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0   # Ensures color transition at zero
  ) +
  theme_bw() +
  labs(
    title = "Effect of Varying Progression and Response Thresholds on IRR",
    subtitle = "RECIST 1.1 *Target* Tumor Tumor Response in Study 2"
  )












###################################### OVERALL OUTCOME SIMULATION




#
# irr_simulation_overall_outcome_study_two <- tr_study_two_simulation_data %>%
#   map(~{
#     .x %>%
#       select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated  ) %>%
#       tidyr::unite("Unique_Observation", c("UNI_ID.x", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response_calculated ) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
#
#
# saveRDS(irr_simulation_overall_outcome_study_two, "./simulate_thresholds/irr_simulation_overall_outcome_study_two.rds")



if (!"irr_simulation_overall_outcome_study_two" %in% ls()){
  irr_simulation_overall_outcome_study_two <- readRDS("./simulate_thresholds/irr_simulation_overall_outcome_study_two.rds")
}



study_two_original_irr_value_overall <- irr_simulation_overall_outcome_study_two[[study_two_original_irr_id]]$value


all_combos %>%
  mutate(
    fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_two, ~.x[["value"]]),
    fleiss_kappa_deviation = fleiss_kappa_simulation - study_two_original_irr_value_overall,
    fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_two_original_irr_value_overall
  ) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0   # Ensures color transition at zero
  ) +
  theme_bw() +
  labs(
    title = "Effect of Varying Progression and Response Thresholds on IRR",
    subtitle = "RECIST 1.1 *Overall* Response in Study 2",
    # caption = "The intersecting black lines at x=0.20 and y=0.30 represent the actual RECIST threshold values (i.e. 20% disease progression and 30% response/decline in SLD). The values for the heatmap are thus the difference between the IRR value at the actual RECIST criteria values and the value calculated at the given combination of disease progression and disease response. This Overall Response plot also incorporates information from New Target and Non-Target Lesions as well as the recalculated Target Lesion values."
  )

B.6.0.4 thesis_code/simulate_thresholds/orr_sim_study_one.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

if (!"tr_study_one_simulation_data" %in% ls()){
  tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
}


orr_study_one_sims <- map(tr_study_one_simulation_data, ~{
  temp <- .x %>%
    group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
    summarize(
      responder = any(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"))
    ) %>%
    ungroup() %>%
    pivot_wider(names_from = RSEVALID, values_from = responder) %>%
    select(-1) %>%
    as.matrix() %>%
    na.omit() %>%
    nonpar::cochrans.q()
  return(c(q = temp@TestStat, p = temp@PVal))
})

orr_study_one_sims %>%
  map_dbl(~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())

# saveRDS(orr_study_one_sims, "simulate_thresholds/orr_study_one_sims.rds")

if (!"orr_study_one_sims" %in% ls()) {
  orr_study_one_sims <- readRDS("simulate_thresholds/orr_study_one_sims.rds")
}


tibble(
  p_values = map_dbl(orr_study_one_sims, ~.x[["p"]] %>% str_remove("The p-value is  ") %>% as.numeric()),
  statistic = map_dbl(orr_study_one_sims, ~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
) %>%
  bind_cols(all_combos) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = p_values)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0.05   # Ensures color transition at zero
  ) +
  labs(
    title = "NCT02395172: Chi-Squared Test p-values of Objective Response Rate",
    subtitle = "Comparison between Site Investigator, Central Reviewer 1, and Central Reviewer 2",
    x = "Disease Progression Threshold",
    y = "Parial Response Threshold"
  ) +
  theme_bw()

B.6.0.5 thesis_code/simulate_thresholds/orr_sim_study_three.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

if (!"tr_study_three_simulation_data" %in% ls()){
  tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
}


orr_study_three_sims <- map(tr_study_three_simulation_data, ~{
  temp <- .x %>%
    group_by(across(any_of(c("USUBJID", "UNI_ID"))), RSEVALID) %>%
    summarize(
      responder = any(overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"))
    ) %>%
    ungroup() %>%
    pivot_wider(names_from = RSEVALID, values_from = responder) %>%
    select(-1) %>%
    as.matrix() %>%
    na.omit() %>%
    nonpar::cochrans.q()
  return(c(q = temp@TestStat, p = temp@PVal))
})

orr_study_three_sims %>%
  map_dbl(~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())


# saveRDS(orr_study_three_sims, "simulate_thresholds/orr_study_three_sims.rds")

if (!"orr_study_three_sims" %in% ls()) {
  orr_study_three_sims <- readRDS("simulate_thresholds/orr_study_three_sims.rds")
}


tibble(
  p_values = map_dbl(orr_study_three_sims, ~.x[["p"]] %>% str_remove("The p-value is  ") %>% as.numeric()),
  statistic = map_dbl(orr_study_three_sims, ~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
) %>%
  bind_cols(all_combos) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = p_values)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0.05   # Ensures color transition at zero
  ) +
  labs(
    title = "NCT03631706: Chi-Squared Test p-values of Objective Response Rate",
    subtitle = "Comparison between Site Investigator, Central Reviewer 1, and Central Reviewer 2",
    x = "Disease Progression Threshold",
    y = "Parial Response Threshold"
  ) +
  theme_bw()

B.6.0.6 thesis_code/simulate_thresholds/orr_sim_study_two.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

if (!"tr_study_two_simulation_data" %in% ls()){
  tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
}


orr_study_two_sims <- map(tr_study_two_simulation_data, ~{
  temp <- .x %>%
    group_by(across(any_of(c("USUBJID", "UNI_ID.x"))), RSEVALID) %>%
    summarize(
      responder = any(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"))
    ) %>%
    ungroup() %>%
    pivot_wider(names_from = RSEVALID, values_from = responder) %>%
    select(-1) %>%
    as.matrix() %>%
    na.omit() %>%
    nonpar::cochrans.q()
  return(c(q = temp@TestStat, p = temp@PVal))
})

orr_study_two_sims %>%
  map_dbl(~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric()) %>%
  sd()

# saveRDS(orr_study_two_sims, "simulate_thresholds/orr_study_two_sims.rds")

if (!"orr_study_two_sims" %in% ls()) {
  orr_study_two_sims <- readRDS("simulate_thresholds/orr_study_two_sims.rds")
}

tibble(
  p_values = map_dbl(orr_study_two_sims, ~.x[["p"]] %>% str_remove("The p-value is  ") %>% as.numeric()),
  statistic = map_dbl(orr_study_two_sims, ~.x[["q"]] %>% str_remove("Q = ") %>% as.numeric())
) %>%
  bind_cols(all_combos) %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = p_values)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red",  # Color for positive values
    midpoint = 0.05   # Ensures color transition at zero
  ) +
  labs(
    title = "NCT03434379: Chi-Squared Test p-values of Objective Response Rate",
    subtitle = "Comparison between Site Investigator, Central Reviewer 1, and Central Reviewer 2",
    x = "Disease Progression Threshold",
    y = "Parial Response Threshold"
  ) +
  theme_bw()

B.6.0.7 thesis_code/simulate_thresholds/survival_response_duration_sim_study_one.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_one_simulation_data" %in% ls()){
#   tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_duration_response_function.R")

# study_one_resonse_duration_sim <- map(tr_study_one_simulation_data, ~{
#   .x %>%
#     group_by(USUBJID, RSEVALID) %>%
#     select(USUBJID, RSDY, RSEVALID, overall_response_calculated ) %>%
#     mutate(
#       first_response = (overall_response_calculated  %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)") & cumsum(overall_response_calculated  %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) == 1 ),
#       first_disease_progression = (overall_response_calculated  %in% c("PROGRESSIVE DISEASE (PD)") & cumsum(overall_response_calculated  %in% c("PROGRESSIVE DISEASE (PD)")) == 1),
#       end_date = first_disease_progression | (all(!overall_response_calculated  %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(any(first_response == TRUE)) %>%
#     dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
#     mutate(
#       time_interval = RSDY - dplyr::lag(RSDY)
#     ) %>%
#     dplyr::filter(!is.na(time_interval)) %>%
#     list() %>%
#     survival_duration_response(plot = F)
#   }
# )
#
#
#
#
# saveRDS(study_one_resonse_duration_sim, "simulate_thresholds/study_one_resonse_duration_sim.rds")

if (!"study_one_resonse_duration_sim" %in% ls()) {
  study_one_resonse_duration_sim <- readRDS("simulate_thresholds/study_one_resonse_duration_sim.rds")
}



study_one_resonse_duration_sim[[1]] %>%
  map(~map(.x["emmeans_result"], ~{
    as.data.frame(.x$contrasts)
  })) %>%
  unlist(recursive = FALSE) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_remove(id, ".emmeans_result") %>%
           paste0(., ": ", contrast)
  ) %>%
  dplyr::filter()

coef(study_one_resonse_duration_sim[[100]][[1]]$cox_model)




study_one_resonse_duration_sim_coefs <- map_df(study_one_resonse_duration_sim, ~{
  tryCatch(
    coef(.x[[1]][["cox_model"]]),
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
    )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_one_actual_average_difference_dor <- study_one_resonse_duration_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_one_resonse_duration_sim_coefs %<>%
  mutate(
    difference_from_actual = study_one_actual_average_difference_dor - average
  )


saveRDS(study_one_resonse_duration_sim_coefs,
        "./transfer/simulations/dor/study_one_dor_emmeans.rds"
)

study_one_resonse_duration_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 1 -- Duration of Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )




#### FILTER FOR FEWER THAN 20 EVENTS

study_one_resonse_duration_sim_coefs_filtered <- map_df(study_one_resonse_duration_sim, ~{
  tryCatch({
    cox_model <- .x[[1]][["cox_model"]]
    if (cox_model$nevent < 20) {
      return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
    } else {
      return(coef(cox_model))
    }
  },
  error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_one_actual_average_difference_filtered_dor <- study_one_resonse_duration_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_one_resonse_duration_sim_coefs_filtered %<>%
  mutate(
    difference_from_actual = study_one_actual_average_difference_filtered_dor - average
  )

saveRDS(study_one_resonse_duration_sim_coefs_filtered,
        "./transfer/simulations/dor/study_one_dor_filtered_emmeans.rds"
)


study_one_resonse_duration_sim_coefs_filtered %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 1 -- Duration of Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.8 thesis_code/simulate_thresholds/survival_response_duration_sim_study_three.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_three_simulation_data" %in% ls()){
#   tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_duration_response_function.R")

# study_three_resonse_duration_sim <- map(tr_study_three_simulation_data, ~{
#   .x %>%
#     group_by(USUBJID, RSEVALID) %>%
#     select(USUBJID, RSDY, RSEVALID, overall_response ) %>%
#     mutate(
#       first_response = (overall_response  %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)") & cumsum(overall_response  %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) == 1 ),
#       first_disease_progression = (overall_response  %in% c("PROGRESSIVE DISEASE (PD)") & cumsum(overall_response  %in% c("PROGRESSIVE DISEASE (PD)")) == 1),
#       end_date = first_disease_progression | (all(!overall_response  %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(any(first_response == TRUE)) %>%
#     dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
#     mutate(
#       time_interval = RSDY - dplyr::lag(RSDY)
#     ) %>%
#     dplyr::filter(!is.na(time_interval)) %>%
#     list() %>%
#     survival_duration_response(plot = F)
#   }
# )
#
#
#
#
# saveRDS(study_three_resonse_duration_sim, "simulate_thresholds/study_three_resonse_duration_sim.rds")

if (!"study_three_resonse_duration_sim" %in% ls()) {
  study_three_resonse_duration_sim <- readRDS("simulate_thresholds/study_three_resonse_duration_sim.rds")
}



study_three_resonse_duration_sim[[1]] %>%
  map(~map(.x["emmeans_result"], ~{
    as.data.frame(.x$contrasts)
  })) %>%
  unlist(recursive = FALSE) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_remove(id, ".emmeans_result") %>%
           paste0(., ": ", contrast)
  ) %>%
  dplyr::filter()

coef(study_three_resonse_duration_sim[[100]][[1]]$cox_model)




study_three_resonse_duration_sim_coefs <- map_df(study_three_resonse_duration_sim, ~{
  tryCatch(
    coef(.x[[1]][["cox_model"]]),
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
    )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_three_actual_average_difference_dor <- study_three_resonse_duration_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_three_resonse_duration_sim_coefs %<>%
  mutate(
    difference_from_actual = study_three_actual_average_difference_dor - average
  )


saveRDS(study_three_resonse_duration_sim_coefs,
        "./transfer/simulations/dor/study_three_dor_emmeans.rds"
)


study_three_resonse_duration_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 3 -- Duration of Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )








#### FILTER FOR FEWER THAN 20 EVENTS

study_three_resonse_duration_sim_coefs_filtered <- map_df(study_three_resonse_duration_sim, ~{
  tryCatch({
    cox_model <- .x[[1]][["cox_model"]]
    if (cox_model$nevent < 20) {
      return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
    } else {
      return(coef(cox_model))
    }
  },
  error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_three_actual_average_difference_filtered_dor <- study_three_resonse_duration_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_three_resonse_duration_sim_coefs_filtered %<>%
  mutate(
    difference_from_actual = study_three_actual_average_difference_filtered_dor - average
  )


saveRDS(study_three_resonse_duration_sim_coefs_filtered,
        "./transfer/simulations/dor/study_three_dor_filtered_emmeans.rds"
)



study_three_resonse_duration_sim_coefs_filtered %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 3 -- Duration of Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.9 thesis_code/simulate_thresholds/survival_response_duration_sim_study_two.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_two_simulation_data" %in% ls()){
#   tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_duration_response_function.R")

# study_two_resonse_duration_sim <- map(tr_study_two_simulation_data, ~{
#   .x %>%
#     group_by(UNI_ID.x, RSEVALID) %>%
#     select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated ) %>%
#     mutate(
#       first_response = (overall_response_calculated  %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)") & cumsum(overall_response_calculated  %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) == 1 ),
#       first_disease_progression = (overall_response_calculated  %in% c("PROGRESSIVE DISEASE (PD)") & cumsum(overall_response_calculated  %in% c("PROGRESSIVE DISEASE (PD)")) == 1),
#       end_date = first_disease_progression | (all(!overall_response_calculated  %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(any(first_response == TRUE)) %>%
#     dplyr::filter(first_response == TRUE | end_date == TRUE) %>%
#     mutate(
#       time_interval = RSDY - dplyr::lag(RSDY)
#     ) %>%
#     dplyr::filter(!is.na(time_interval)) %>%
#     list() %>%
#     survival_duration_response(plot = F)
#   }
# )
#
#
#
#
# saveRDS(study_two_resonse_duration_sim, "simulate_thresholds/study_two_resonse_duration_sim.rds")

if (!"study_two_resonse_duration_sim" %in% ls()) {
  study_two_resonse_duration_sim <- readRDS("simulate_thresholds/study_two_resonse_duration_sim.rds")
}



study_two_resonse_duration_sim[[1]] %>%
  map(~map(.x["emmeans_result"], ~{
    as.data.frame(.x$contrasts)
  })) %>%
  unlist(recursive = FALSE) %>%
  list_rbind(names_to = "id") %>%
  mutate(id = str_remove(id, ".emmeans_result") %>%
           paste0(., ": ", contrast)
  ) %>%
  dplyr::filter()

coef(study_two_resonse_duration_sim[[100]][[1]]$cox_model)




study_two_resonse_duration_sim_coefs <- map_df(study_two_resonse_duration_sim, ~{
  tryCatch(
    coef(.x[[1]][["cox_model"]]),
    error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
    )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_two_actual_average_difference_dor <- study_two_resonse_duration_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_two_resonse_duration_sim_coefs %<>%
  mutate(
    difference_from_actual = study_two_actual_average_difference_dor - average
  )


saveRDS(study_two_resonse_duration_sim_coefs,
        "./transfer/simulations/dor/study_two_dor_emmeans.rds"
)

study_two_resonse_duration_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 2 -- Duration of Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )






#### FILTER FOR FEWER THAN 20 EVENTS


study_two_resonse_duration_sim_coefs_filtered <- map_df(study_two_resonse_duration_sim, ~{
  tryCatch({
    cox_model <- .x[[1]][["cox_model"]]
    if (cox_model$nevent < 20) {
      return(tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA))
    } else {
      return(coef(cox_model))
    }
  },
  error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_two_actual_average_difference_filtered_dor <- study_two_resonse_duration_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_two_resonse_duration_sim_coefs_filtered %<>%
  mutate(
    difference_from_actual = study_two_actual_average_difference_filtered_dor - average
  )


saveRDS(study_two_resonse_duration_sim_coefs_filtered,
        "./transfer/simulations/dor/study_two_dor_filtered_emmeans.rds"
)

# Note that only 17 events were observed for the actual RECIST guideline
# and calculations for the difference from there are based on the original value
# in the plot even though that space is greyed out in the filtered plot.


study_two_resonse_duration_sim_coefs_filtered %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 2 -- Duration of Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.10 thesis_code/simulate_thresholds/survival_time_progression_sim_study_one.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_one_simulation_data" %in% ls()){
#   tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_time_progression_function.R")
#
# study_one_progression_sim <- map(tr_study_one_simulation_data, ~{
#   .x %>%
#     group_by(USUBJID, RSEVALID) %>%
#     select(USUBJID, RSDY, RSEVALID, overall_response_calculated) %>%
#     dplyr::filter(
#       (overall_response_calculated == "PROGRESSIVE DISEASE (PD)") |
#         (overall_response_calculated != "PROGRESSIVE DISEASE (PD)" & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(RSDY == min(RSDY)) %>%
#     arrange(USUBJID, RSEVALID, RSDY) %>%
#     mutate(
#       general_status = if_else(overall_response_calculated == "PROGRESSIVE DISEASE (PD)", 1, 0),
#       RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
#     ) %>%
#     list() %>%
#     survival_time_progression(plot = F)
# }
# )
#
#
#
# saveRDS(study_one_progression_sim, "simulate_thresholds/study_one_progression_sim.rds")

if (!"study_one_progression_sim" %in% ls()) {
  study_one_progression_sim <- readRDS("simulate_thresholds/study_one_progression_sim.rds")
}




study_one_progression_sim_coefs <- map_df(study_one_progression_sim, ~{
  tryCatch(
    coef(.x[[1]][["cox_model"]]),
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)

study_one_actual_average_difference_progression <- study_one_progression_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_one_progression_sim_coefs %<>%
  mutate(
    difference_from_actual = study_one_actual_average_difference - average
  )


saveRDS(study_one_progression_sim_coefs,
        "./transfer/simulations/progression/study_one_progression_emmeans.rds"
)

study_one_progression_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 1 -- Time to Progression: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.11 thesis_code/simulate_thresholds/survival_time_progression_sim_study_three.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_three_simulation_data" %in% ls()){
#   tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_time_progression_function.R")

# study_three_progression_sim <- map(tr_study_three_simulation_data, ~{
#   .x %>%
#     arrange(USUBJID, RSDY) %>%
#     group_by(USUBJID, RSEVALID) %>%
#     select(USUBJID, RSDY, RSEVALID, overall_response) %>%
#     dplyr::filter(
#       (overall_response %in% c("PROGRESSIVE DISEASE (PD)")) |
#         (all(!overall_response %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(RSDY == min(RSDY)) %>%
#     arrange(USUBJID, RSEVALID, RSDY) %>%
#     mutate(
#       general_status = if_else(overall_response %in% c("PROGRESSIVE DISEASE (PD)"), 1, 0),
#       RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
#     ) %>%
#     list() %>%
#     survival_time_progression(plot = F)
# }
# )



# saveRDS(study_three_progression_sim, "simulate_thresholds/study_three_progression_sim.rds")

if (!"study_three_progression_sim" %in% ls()) {
  study_three_progression_sim <- readRDS("simulate_thresholds/study_three_progression_sim.rds")
}




study_three_progression_sim_coefs <- map_df(study_three_progression_sim, ~{
  tryCatch(
    coef(.x[[1]][["cox_model"]]),
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)

study_three_actual_average_difference <- study_three_progression_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_three_progression_sim_coefs %<>%
  mutate(
    difference_from_actual = study_three_actual_average_difference - average
  )


saveRDS(study_three_progression_sim_coefs,
        "./transfer/simulations/progression/study_three_progression_emmeans.rds"
)


study_three_progression_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 3 -- Time to Progression: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.12 thesis_code/simulate_thresholds/survival_time_progression_sim_study_two.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_two_simulation_data" %in% ls()){
#   tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_time_progression_function.R")

# study_two_progression_sim <- map(tr_study_two_simulation_data, ~{
#   .x %>%
#     arrange(UNI_ID.x, RSDY) %>%
#     group_by(UNI_ID.x, RSEVALID) %>%
#     select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated) %>%
#     dplyr::filter(
#       (overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) |
#         (all(!overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(RSDY == min(RSDY)) %>%
#     arrange(UNI_ID.x, RSEVALID, RSDY) %>%
#     mutate(
#       general_status = if_else(overall_response_calculated %in% c("PROGRESSIVE DISEASE (PD)"), 1, 0),
#       RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "RADIOLOGIST 1", "RADIOLOGIST 2"))
#     ) %>%
#     list() %>%
#     survival_time_progression(plot = F)
# }
# )



# saveRDS(study_two_progression_sim, "simulate_thresholds/study_two_progression_sim.rds")

if (!"study_two_progression_sim" %in% ls()) {
  study_two_progression_sim <- readRDS("simulate_thresholds/study_two_progression_sim.rds")
}




study_two_progression_sim_coefs <- map_df(study_two_progression_sim, ~{
  tryCatch(
    coef(.x[[1]][["cox_model"]]),
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)

study_two_actual_average_difference <- study_two_progression_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_two_progression_sim_coefs %<>%
  mutate(
    difference_from_actual = study_two_actual_average_difference - average
  )

saveRDS(study_two_progression_sim_coefs,
        "./transfer/simulations/progression/study_two_progression_emmeans.rds"
)


study_two_progression_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = difference_from_actual)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 2 -- Time to Progression: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.13 thesis_code/simulate_thresholds/survival_time_response_sim_study_one.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_one_simulation_data" %in% ls()){
#   tr_study_one_simulation_data <- readRDS("tr_study_one_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_time_response_function.R")

# study_one_time_response_sim <- map(tr_study_one_simulation_data, ~{
#   .x %>%
#     group_by(USUBJID, RSEVALID) %>%
#     select(USUBJID, RSDY, RSEVALID, overall_response_calculated ) %>%
#     dplyr::filter(
#       (overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) |
#         (all(!overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(RSDY == min(RSDY)) %>%
#     arrange(USUBJID, RSEVALID, RSDY) %>%
#     mutate(
#       general_status = if_else(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"), 1, 0),
#       RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
#     ) %>%
#     list()
#   }
# )
#
# study_one_time_response_sim %<>%
#   map(~survival_time_response(.x, plot = F))
#
# saveRDS(study_one_time_response_sim, "simulate_thresholds/study_one_time_response_sim.rds")

if (!"study_one_time_response_sim.rds" %in% ls()) {
  study_one_time_response_sim <- readRDS("simulate_thresholds/study_one_time_response_sim.rds")
}





study_one_time_response_sim_coefs <- map_df(study_one_time_response_sim, ~{
  tryCatch({
    coef(.x[[1]][["cox_model"]])
    },
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
    )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_one_actual_average_difference_response <- study_one_time_response_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_one_time_response_sim_coefs %<>%
  mutate(
    difference_from_actual = study_one_actual_average_difference_response - average
  )


saveRDS(study_one_time_response_sim_coefs,
        "./transfer/simulations/response/study_one_response_emmeans.rds"
        )


study_one_time_response_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 1 -- Time to Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )




#### FILTER FOR FEWER THAN 20 EVENTS

study_one_time_response_sim_coefs_filtered <- map_df(study_one_time_response_sim, ~{
  tryCatch({
    cox_model <- .x[[1]][["cox_model"]]
    if (cox_model$nevent < 20) {
      return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
    } else {
      return(coef(cox_model))
    }
  },
  error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)



study_one_actual_average_difference_filtered_response <- study_one_time_response_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_one_time_response_sim_coefs_filtered %<>%
  mutate(
    difference_from_actual = study_one_actual_average_difference_filtered_response - average
  )


saveRDS(study_one_time_response_sim_coefs_filtered,
        "./transfer/simulations/response/study_one_response_filtered_emmeans.rds"
)


study_one_time_response_sim_coefs_filtered %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 1 -- Time to Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.14 thesis_code/simulate_thresholds/survival_time_response_sim_study_three.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_three_simulation_data" %in% ls()){
#   tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_time_response_function.R")

# study_three_time_response_sim <- map(tr_study_three_simulation_data, ~{
#   .x %>%
#     group_by(USUBJID, RSEVALID) %>%
#     select(USUBJID, RSDY, RSEVALID, overall_response) %>%
#     dplyr::filter(
#       (overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) |
#         (all(!overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(RSDY == min(RSDY)) %>%
#     arrange(USUBJID, RSEVALID, RSDY) %>%
#     mutate(
#       general_status = if_else(overall_response %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"), 1, 0),
#       RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "READER 1", "READER 2"))
#     ) %>%
#     list() %>%
#     survival_time_response(plot = F)
#   }
# )
#
#
#
#
# saveRDS(study_three_time_response_sim, "simulate_thresholds/study_three_time_response_sim.rds")

if (!"study_three_time_response_sim.rds" %in% ls()) {
  study_three_time_response_sim <- readRDS("simulate_thresholds/study_three_time_response_sim.rds")
}



# some of the results are wild at the high partial response threshold values
# and we should consider setting a minimum number of events to have a valid
# analysis of the HRs
study_three_time_response_sim[[1200]][[1]]$cox_model$nevent


study_three_time_response_sim_coefs <- map_df(study_three_time_response_sim, ~{
  tryCatch({
    coef(.x[[1]][["cox_model"]])
    },
    error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
    )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_three_actual_average_difference_response <- study_three_time_response_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_three_time_response_sim_coefs %<>%
  mutate(
    difference_from_actual = study_three_actual_average_difference_response - average
  )

saveRDS(study_three_time_response_sim_coefs,
        "./transfer/simulations/response/study_three_response_emmeans.rds"
)


study_three_time_response_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 3 -- Time to Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )







#### FILTER FOR FEWER THAN 20 EVENTS

study_three_time_response_sim_coefs_filtered <- map_df(study_three_time_response_sim, ~{
  tryCatch({
    cox_model <- .x[[1]][["cox_model"]]
    if (cox_model$nevent < 20) {
      return(tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA))
    } else {
      return(coef(cox_model))
    }
  },
  error = function(e) tibble(`RSEVALIDREADER 1` = NA, `RSEVALIDREADER 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)



study_three_actual_average_difference_filtered_response <- study_three_time_response_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_three_time_response_sim_coefs_filtered %<>%
  mutate(
    difference_from_actual = study_three_actual_average_difference_filtered_response - average
  )


saveRDS(study_three_time_response_sim_coefs_filtered,
        "./transfer/simulations/response/study_three_response_filtered_emmeans.rds"
)


study_three_time_response_sim_coefs_filtered %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 3 -- Time to Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.15 thesis_code/simulate_thresholds/survival_time_response_sim_study_two.R

 all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)

# if (!"tr_study_two_simulation_data" %in% ls()){
#   tr_study_two_simulation_data <- readRDS("tr_study_two_simulation_data.rds")
# }

library(survival)
library(survminer)
source("survival_analyses/survival_time_response_function.R")

# study_two_time_response_sim <- map(tr_study_two_simulation_data, ~{
#   .x %>%
#     group_by(UNI_ID.x, RSEVALID) %>%
#     select(UNI_ID.x, RSDY, RSEVALID, overall_response_calculated ) %>%
#     dplyr::filter(
#       (overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) |
#         (all(!overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)")) & RSDY == max(RSDY))
#     ) %>%
#     dplyr::filter(RSDY == min(RSDY)) %>%
#     arrange(UNI_ID.x, RSEVALID, RSDY) %>%
#     mutate(
#       general_status = if_else(overall_response_calculated %in% c("COMPLETE RESPONSE (CR)", "PARTIAL RESPONSE (PR)"), 1, 0),
#       RSEVALID = factor(RSEVALID, levels = c("INVESTIGATOR", "RADIOLOGIST 1", "RADIOLOGIST 2"))
#     ) %>%
#     list()
#   }
# )
#
#
# study_two_time_response_sim %<>%
#   map(~survival_time_response(.x, plot = F))
#
#
#
# saveRDS(study_two_time_response_sim, "simulate_thresholds/study_two_time_response_sim.rds")

if (!"study_two_time_response_sim.rds" %in% ls()) {
  study_two_time_response_sim <- readRDS("simulate_thresholds/study_two_time_response_sim.rds")
}





study_two_time_response_sim_coefs <- map_df(study_two_time_response_sim, ~{
  tryCatch({
    coef(.x[[1]][["cox_model"]])
    },
    error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
    )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_two_actual_average_difference_response <- study_two_time_response_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_two_time_response_sim_coefs %<>%
  mutate(
    difference_from_actual = study_two_actual_average_difference_response - average
  )


saveRDS(study_two_time_response_sim_coefs,
        "./transfer/simulations/response/study_two_response_emmeans.rds"
)


study_two_time_response_sim_coefs %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 2 -- Time to Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )




#### FILTER FOR FEWER THAN 20 EVENTS

study_two_time_response_sim_coefs_filtered <- map_df(study_two_time_response_sim, ~{
  tryCatch({
    cox_model <- .x[[1]][["cox_model"]]
    if (cox_model$nevent < 20) {
      return(tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA))
    } else {
      return(coef(cox_model))
    }
  },
  error = function(e) tibble(`RSEVALIDRADIOLOGIST 1` = NA, `RSEVALIDRADIOLOGIST 2` = NA)
  )
}) %>%
  mutate(average = rowMeans(.)) %>%
  bind_cols(all_combos)


study_two_actual_average_difference_filtered_response <- study_two_time_response_sim_coefs %>%
  dplyr::filter(
    progression_threshold == 0.20,
    partial_response_threshold == 0.30
  ) %>%
  pull(average)

study_two_time_response_sim_coefs_filtered %<>%
  mutate(
    difference_from_actual = study_two_actual_average_difference_filtered_response - average
  )



saveRDS(study_two_time_response_sim_coefs_filtered,
        "./transfer/simulations/response/study_two_response_filtered_emmeans.rds"
)


study_two_time_response_sim_coefs_filtered %>%
  ggplot(aes(progression_threshold, partial_response_threshold, fill = average)) +
  geom_tile() +
  # scale_fill_distiller(palette = "RdBu") +
  geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
  geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
  scale_fill_gradient2(
    low = "blue",  # Color for negative values
    mid = "white", # Color at 0
    high = "red"
  ) +
  theme_bw() +
  labs(
    title = "Study 2 -- Time to Response: Differences in Hazard Ratios",
    subtitle = "Site Investigator vs. Averaged Central Reviewer",
    x = "Disease Progression Threshold",
    y = "Partial Response Threshold",
    fill = "Diff from\nActual"
  )

B.6.0.16 thesis_code/simulate_thresholds/tr_data_study_one.R

 library(tidyr)

if (!"rs_files_with_multiple_raters" %in% ls()){
  rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")
}

if (!"tr_where_rs_files" %in% ls()){
  tr_where_rs_files <- readRDS("tr_where_rs_files.rds")
}

get_mode <- function(x) {
  # If the vector has only one unique value, return it directly
  if (length(unique(na.omit(x))) == 1) {
    return(unique(na.omit(x)))
  }

  # Otherwise, calculate the mode
  uniq_vals <- unique(x)
  uniq_vals[which.max(tabulate(match(x, uniq_vals)))]
}

cummin_na <- function(x) {
  replace_na(x, Inf) %>% cummin() %>% replace(. == Inf, NA)
}

all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)








# I need/want to find the New Lesion and Non-Target Lesion info for
# the Oncologist. Only Overall Response (sometimes listed Clinical Response)
# is available in the RS domain data

rs_files_with_multiple_raters[[1]] %>%
  count(RSCAT, RSEVAL, RSEVALID, RSTEST, RSSTAT) %>%
  View()


rs_files_with_multiple_raters[[1]] %>%
  dplyr::filter(
    RSEVALID == "" | (RSEVALID == "ONCOLOGIST" & RSSTAT == "NOT DONE")
    , RSCAT != "CLINICAL ASSESSMENT"
  ) %>%
  View()


rs_files_with_multiple_raters[[1]] %>%
  dplyr::filter(
    RSEVALID != ""
  ) %>%
  View()


list.files("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted")
haven::read_xpt("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted/ts.xpt") %>%
  View()


# None of the supp tables have useful info
# study_one_supp_tables <- list.files("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted", full.names = T) %>%
#   grep("supp", . , value = T) %>%
#   map(haven::read_xpt)
#
# View(study_one_supp_tables[[17]])



study_one_nonsupp_tables <- list.files("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer/Study_20250320125315074/EMR 100070-004_10_Unconverted", full.names = T) %>%
  grep(".xpt", . , value = T) %>%
  grep("supp", . , value = T, invert = T) %>%
  map(haven::read_xpt)

# This is just the TR Domain file
study_one_nonsupp_tables[[24]] %>% View()


# Will probably need to take the TR file, filter for investigator NEW and NT
# data and then craft a data set with that which can then be joined to the
# data for the Readers
tr_where_rs_files[[1]] %>%
  count(TREVAL, TREVALID, TRLNKGRP, TRORRES, ) %>%
  dplyr::filter(TREVAL == "INVESTIGATOR") %>%
  View()

















######### GENERAL TR DATA


study_one_sld_data <- tr_where_rs_files[[1]] %>%
  dplyr::filter(
    TRTESTCD %in% c("SUMDIAM", "SAXIS")
  ) %>%
  mutate(
    TRGRPID_trunc = str_extract(TRGRPID, "A[[:digit:]]{1,3}"),
    TREVALID = if_else(TREVALID == "", TREVAL, TREVALID)
  ) %>%
  group_by(USUBJID, TRGRPID_trunc) %>%
  mutate(TRDY_sub = get_mode(TRDY)) %>%
  ungroup() %>%
  mutate(
    TRSTRESC = case_when(
      TRORRES %in% c("TOO SMALL TO MEASURE (5 MM TO BE USED FOR CALCULATION)", "TOO SMALL TO MEASURE") ~ "5",
      TRUE ~ TRSTRESC
    )
  ) %>%
  select(USUBJID, TRDY_sub, TREVALID, TRTESTCD, TRSTRESC) %>%
  arrange(USUBJID, TRDY_sub, TREVALID) %>%
  tidyr::pivot_wider(
    names_from = TRTESTCD,
    values_from = TRSTRESC,
    values_fn = ~sum(as.numeric(.x))
  ) %>%
  # select(USUBJID, TRTEST, TRORRES, SUMDIAM, TREVALID, TRDY_sub) %>%
  mutate(
    SUMDIAM_num = as.numeric(SUMDIAM),
    SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num)
  ) %>%
  dplyr::filter(!is.na(SUMDIAM)) %>%
  mutate(
    SUMDIAM_num = as.numeric(SUMDIAM),
    SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num),
    SAXIS_num = as.numeric(SAXIS),
    SAXIS_num = if_else(SAXIS %in% c("TOO SMALL TO MEASURE"), 0, SAXIS_num)
  ) %>%
  group_by(USUBJID, TREVALID) %>%
  mutate(
    baseline_value = first(SUMDIAM_num),
    mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
    mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
    nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
    nadir_cummin_value = cummin_na(SUMDIAM_num)
  ) %>%
  mutate(
    percent_change_from_nadir = if_else(
      !is.na(mm_change_from_previous),
      (SUMDIAM_num - lag(nadir_cummin_value))/lag(nadir_cummin_value),
      NA_real_
    )
  ) %>%
  mutate(
    baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
    progressive_disease = (percent_change_from_nadir > 0.2) & ((SUMDIAM_num - nadir_cummin_value) > 5),
    partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
    complete_response = SUMDIAM_num == 0
  ) %>%
  mutate(
    target_response_calculated = case_when(
      # SUMNLNLD
      SUMDIAM == "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
      SUMDIAM_num == 0 ~ "COMPLETE RESPONSE (CR)",
      (SUMDIAM_num <= 10 & SAXIS == "TOO SMALL TO MEASURE") ~ "COMPLETE RESPONSE (CR)",
      (SAXIS_num <= 10 & near(SUMDIAM_num, SAXIS_num)) ~ "COMPLETE RESPONSE (CR)",
      # (SAXIS_num <= 10 & (SUMDIAM_num - SAXIS_num <= 10)) ~ "COMPLETE RESPONSE (CR)",
      (((SUMDIAM_num-baseline_value)/baseline_value) <= -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
      ((percent_change_from_nadir >= 0.2) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
      is.na(mm_change_from_previous) ~ NA_character_,
      TRUE ~ "STABLE DISEASE (SD)"
    )
  ) %>%
  ungroup()







######### INVESTIGATOR/ONCOLOGIST

# NOT ASSESSABLE, ABSENT, PRESENT, UNEQUIVOCAL PROGRESSION

investigator_nontarget_new_tumors <- tr_where_rs_files[[1]] %>%
  mutate(TREVALID = if_else(TREVALID == "", TREVAL, TREVALID)) %>%
  dplyr::filter(
    TREVAL == "INVESTIGATOR",
    stringr::str_detect(TRLNKGRP, "^T-.*", negate = T),
    TRDY > 0
  ) %>%
  select(USUBJID, TREVALID, TRLNKID, TRGRPID, TRORRES, TRDY) %>%
  mutate(
    tumor_label = case_when(
      stringr::str_detect(TRLNKID, "^NT.*") ~ "Non-target Response",
      stringr::str_detect(TRLNKID, "^NEW.*") ~ "New Lesion Present"
    ),
    TRORRES = if_else(TRORRES == "BASELINE", "PRESENT", TRORRES)
  ) %>%
  group_by(USUBJID, TRDY, tumor_label) %>%
  mutate(
    worst_response = case_when(
      all(TRORRES == "ABSENT") ~ "ABSENT",
      all(TRORRES == "NOT ASSESSABLE") ~ "NOT ASSESSABLE", # consider changing this to ALL instead of ANY--affects 3 records
      any(TRORRES == "UNEQUIVOCAL PROGRESSION") ~ "UNEQUIVOCAL PROGRESSION",
      all(TRORRES == "PRESENT") ~ "PRESENT",
      all(TRORRES %in% c("PRESENT", "ABSENT")) ~ "PRESENT"
    )
  ) %>%
  ungroup() %>%
  distinct(USUBJID, TRDY, tumor_label, .keep_all = TRUE) %>%
  select(USUBJID, TREVALID, RSTEST = tumor_label, RSORRES = worst_response, TRDY)


investigator_overall_response <- rs_files_with_multiple_raters[[1]]  %>%
  dplyr::filter(
    RSEVALID == "ONCOLOGIST",
    RSSTAT != "NOT DONE",
    RSORRES != "DEATH",
    RSTESTCD != "CLINRESP",
    RSDY > 0
  ) %>%
  mutate(RSEVALID = "INVESTIGATOR") %>%
  select(USUBJID, RSTEST, RSORRES, TREVALID = RSEVALID, TRDY= RSDY)



investigator_overall_nontarget_new_response <- investigator_nontarget_new_tumors %>%
  bind_rows(investigator_overall_response) %>%
  arrange(USUBJID, TRDY) %>%
  pivot_wider(
    values_from = RSORRES,
    names_from = RSTEST
  )


study_one_investigator_recist_data <- investigator_overall_nontarget_new_response %>%
    left_join(
      study_one_sld_data,
      by = c("USUBJID" = "USUBJID", "TREVALID" = "TREVALID", "TRDY" = "TRDY_sub")
    ) %>%
  select(USUBJID:`New Lesion Present`, `Target Response` = target_response_calculated) %>%
  mutate(
    `New Lesion Present` = case_when(
      `New Lesion Present` == "UNEQUIVOCAL PROGRESSION" ~ "Y",
      is.na(`New Lesion Present`) ~ "N"
    ),
    `Non-target Response` = case_when(
      `Non-target Response` == "ABSENT" ~ "COMPLETE RESPONSE (CR)",
      `Non-target Response` == "UNEQUIVOCAL PROGRESSION" ~ "PROGRESSIVE DISEASE (PD)",
      `Non-target Response` == "PRESENT" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
      `Non-target Response` == "NOT ASSESSABLE" ~ "NOT ASSESSABLE"

    )
  )  %>%
  rename(RSEVALID = TREVALID, RSDY = TRDY)






############# RATERS 1 AND 2

study_one_raters_recist_data <- rs_files_with_multiple_raters[[1]] %>%
  dplyr::filter(
    RSEVALID %in% c("READER 1", "READER 2"),
    RSDY > 0
  ) %>%
  select(USUBJID, RSTEST, RSORRES, RSEVALID, RSDY) %>%
  pivot_wider(
    values_from = RSORRES,
    names_from = RSTEST
  )


# validate calculation of Target outcome in SLD data


# View(study_one_raters_recist_data %>%
#   left_join(
#     study_one_sld_data,
#     by = c("USUBJID" = "USUBJID", "RSEVALID" = "TREVALID", "RSDY" = "TRDY_sub")
#   ) %>%
#   # select(-c(TRTEST:complete_response)) %>%
#   mutate(
#     target_check = `Target Response` == target_response_calculated
#   ) %>%
#     dplyr::filter(!target_check)
#   )

# Only 8 discrepancies, and only on Complete vs. Partial response









### FINALLY COMBINE ALLLLL DATA

study_one_simulation_prepped_data <- study_one_investigator_recist_data %>%
  bind_rows(study_one_raters_recist_data) %>%
  arrange(USUBJID, RSDY, RSEVALID) %>%
  left_join(
    study_one_sld_data,
    by = c("USUBJID" = "USUBJID", "RSDY" = "TRDY_sub", "RSEVALID" = "TREVALID")
  )









all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)



future::plan(future::multicore, workers = 6)


tr_study_one_simulation_data <- furrr::future_pmap(all_combos, ~{
  study_one_simulation_prepped_data %>%
    mutate(
      baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
      progressive_disease = (percent_change_from_nadir > ..1) & ((SUMDIAM_num - nadir_cummin_value) > 5),
      partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
      complete_response = SUMDIAM_num == 0
    ) %>%
    mutate(
      target_response_calculated = case_when(
        # SUMNLNLD
        SUMDIAM == "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
        SUMDIAM_num == 0 ~ "COMPLETE RESPONSE (CR)",
        ( round(((SUMDIAM_num-baseline_value)/baseline_value), 3) <= -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
        ((percent_change_from_nadir >= ..1) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
        is.na(mm_change_from_previous) ~ NA_character_,
        TRUE ~ "STABLE DISEASE (SD)"
      )
    ) %>%
    ungroup() %>%
    mutate(
      target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
      target_check = `Target Response` == target_response_calculated
    ) %>%
    mutate(
      overall_response_calculated = case_when(
        `New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
        `Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
        target_response_calculated == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
        `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
        `New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
        `New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
        `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "NOT APPLICABLE" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
        `New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
        `New Lesion Present` %in% c("N", "NOT EVALUABLE") & target_response_calculated == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
        `New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "NOT EVALUABLE" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
        `New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
      )
    ) %>%
    select(matches("USUBJID"), RSEVALID, RSDY, `Overall Response`, `Target Response`, `Non-target Response`,
           `New Lesion Present`, SUMDIAM_num:baseline_response,
           target_response_calculated, overall_response_calculated) %>%
    dplyr::filter(!is.na(RSEVALID))
})





saveRDS(tr_study_one_simulation_data, "tr_study_one_simulation_data.rds")

B.6.0.17 thesis_code/simulate_thresholds/tr_data_study_three.R

 
library(tidyr)

if (!"rs_files_with_multiple_raters" %in% ls()){
  rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")
}

if (!"tr_where_rs_files" %in% ls()){
  tr_where_rs_files <- readRDS("tr_where_rs_files.rds")
}

get_mode <- function(x) {
  # If the vector has only one unique value, return it directly
  if (length(unique(na.omit(x))) == 1) {
    return(unique(na.omit(x)))
  }

  # Otherwise, calculate the mode
  uniq_vals <- unique(x)
  uniq_vals[which.max(tabulate(match(x, uniq_vals)))]
}

cummin_na <- function(x) {
  replace_na(x, Inf) %>% cummin() %>% replace(. == Inf, NA)
}

all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)



rs_study_three_ratings <- rs_files_with_multiple_raters[[3]] %>%
  mutate(
    RSEVALID = if_else(is.na(RSEVALID) | RSEVALID == "", RSEVAL, RSEVALID)
  ) %>%
  arrange(USUBJID, RSDY, RSEVALID) %>%
  dplyr::filter(
    RSDY > 0,
    RSCAT == "RECIST 1.1",
    RSTESTCD != "SCANUPL",
    (USUBJID != "20064700370000021" & RSDY != 165)
  ) %>%
  select(USUBJID, RSDY, RSEVALID, RSTEST, RSORRES) %>%
  mutate(
    RSORRES = case_when(
      RSTEST == "New Lesion Progression" & RSORRES == "UNEQUIVOCAL" ~ "Y",
      RSTEST == "New Lesion Progression" & RSORRES == "EQUIVOCAL" ~ "N",
      TRUE ~ RSORRES
    ),
    RSTEST = if_else(RSTEST == "New Lesion Progression", "New Lesion Present", RSTEST),
    RSORRES = case_when(
      RSORRES == "CR" ~ "COMPLETE RESPONSE (CR)",
      RSORRES == "PR" ~ "PARTIAL RESPONSE (PR)",
      RSORRES == "SD" ~ "STABLE DISEASE (SD)",
      RSORRES == "PD" ~ "PROGRESSIVE DISEASE (PD)",
      RSORRES == "NE" ~ "NOT EVALUABLE",
      RSORRES == "NON-CR/NON-PD" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
      TRUE ~ RSORRES
    )
  ) %>%
  pivot_wider(
    values_from = RSORRES,
    names_from = RSTEST
  ) %>%
  mutate(
    `New Lesion Present` = if_else(is.na(`New Lesion Present`), "N", `New Lesion Present`)
  ) %>%
  dplyr::filter(
    !is.na(`Overall Response`),
    `Overall Response` != "BASELINE"
  )
  # count(`Overall Response`, `Target Response`, `Non-target Response`, `New Lesion Present`) %>%
  # View()






# All data for re-calculating Target Response for all raters appears to be
# available here, but the SUMLIDAM values do not directly have a date associated
# to them so primary key attached to a date needs to be found.
# It looks like the TRREFID variable can be used for this.

tr_study_data_three_target_tumors <- tr_where_rs_files[[3]] %>%
  mutate(
    TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
  ) %>%
  dplyr::filter(
    TRCAT == "RECIST 1.1",
    !str_detect(TRLNKGRP, "^NT.*|^NEW.*")
  ) %>%
  group_by(USUBJID, TRREFID) %>%
  mutate(
    TRDY_sub = if_else(TREVALID %in% c("READER 1", "READER 2"), get_mode(TRDY), TRDY),
    TRTESTCD = if_else(TRTESTCD == "", NA_character_, TRTESTCD)
  ) %>%
  ungroup() %>%
  dplyr::filter(
    !TRTESTCD %in% c("TUMSTATE", "LDIAM", "ACNSD", "PCNSD"),
    !is.na(TRDY_sub)
    ) %>%
  ungroup() %>%
  mutate(
    TRSTRESC = case_when(
      TRORRES %in% c("TOO SMALL TO MEASURE (5 MM TO BE USED FOR CALCULATION)", "TOO SMALL TO MEASURE") ~ "5",
      TRUE ~ TRSTRESC
    )
  ) %>%
  select(USUBJID, TRDY_sub, TREVALID, TRTESTCD, TRSTRESC) %>%
  arrange(USUBJID, TRDY_sub, TREVALID) %>%
  tidyr::pivot_wider(
    names_from = TRTESTCD,
    values_from = TRSTRESC,
    values_fn = ~sum(as.numeric(.x))
  ) %>%
  mutate(
    SUMDIAM_num = as.numeric(SUMDIAM),
    SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num),
    SAXIS_num = as.numeric(SAXIS),
    SAXIS_num = if_else(SAXIS %in% c("TOO SMALL TO MEASURE"), 0, SAXIS_num)
  ) %>%
  dplyr::filter(!is.na(SUMDIAM)) %>%
  group_by(USUBJID, TREVALID) %>%
  mutate(
    baseline_value = first(SUMDIAM_num),
    mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
    mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
    nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
    nadir_cummin_value = cummin_na(SUMDIAM_num)
  ) %>%
  mutate(
    percent_change_from_nadir = if_else(
      !is.na(mm_change_from_previous),
      (SUMDIAM_num - lag(nadir_cummin_value))/lag(nadir_cummin_value),
      NA_real_
    )
  ) %>%
  mutate(
    baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
    progressive_disease = (percent_change_from_nadir > 0.2) & ((SUMDIAM_num - nadir_cummin_value) > 5),
    partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
    complete_response = SUMDIAM_num == 0
  ) %>%
  mutate(
    target_response_calculated = case_when(
      # SUMNLNLD
      SUMDIAM == "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
      SUMDIAM_num == 0 ~ "COMPLETE RESPONSE (CR)",
      (SUMDIAM_num <= 10 & SAXIS == "TOO SMALL TO MEASURE") ~ "COMPLETE RESPONSE (CR)",
      (SAXIS_num <= 10 & near(SUMDIAM_num, SAXIS_num)) ~ "COMPLETE RESPONSE (CR)",
      # (SAXIS_num <= 10 & (SUMDIAM_num - SAXIS_num <= 10)) ~ "COMPLETE RESPONSE (CR)",
      (((SUMDIAM_num-baseline_value)/baseline_value) <= -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
      ((percent_change_from_nadir >= 0.2) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
      is.na(mm_change_from_previous) ~ NA_character_,
      TRUE ~ "STABLE DISEASE (SD)"
    )
  ) %>%
  ungroup()
  # View()




check_ids <- rs_study_three_ratings %>%
  dplyr::filter(
    !is.na(`Target Response`)
  ) %>%
  left_join(
    tr_study_data_three_target_tumors,
    by = c("USUBJID" = "USUBJID", "RSDY" = "TRDY_sub", "RSEVALID" = "TREVALID")
  ) %>%
  mutate(
    target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
    target_check = `Target Response` == target_response_calculated
  ) %>%
  mutate(
    overall_response = case_when(
      `New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
      `Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
      `Target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
      `New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
      `New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
      `New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
      `New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
      `New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
      `New Lesion Present` == "NOT APPLICABLE" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
      `New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
      `New Lesion Present` %in% c("N", "NOT EVALUABLE") & `Target Response` == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
      `New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
      `New Lesion Present` == "NOT EVALUABLE" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
      `New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
    )
  ) %>%
  mutate(
    overall_check = `Overall Response` == overall_response
  )






# check_ids %>%
#   select(USUBJID, RSDY, target_response_calculated, RSEVALID) %>%
#   tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#   tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
#   select(-1) %>%
#   irr::kappam.fleiss(., detail = T)






study_three_tr_data <- tr_where_rs_files[[3]] %>%
  mutate(
    TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
  ) %>%
  dplyr::filter(
    TRCAT == "RECIST 1.1",
    !str_detect(TRLNKGRP, "^NT.*|^NEW.*")
  ) %>%
  group_by(USUBJID, TRREFID) %>%
  mutate(
    TRDY_sub = if_else(TREVALID %in% c("READER 1", "READER 2"), get_mode(TRDY), TRDY),
    TRTESTCD = if_else(TRTESTCD == "", NA_character_, TRTESTCD)
  ) %>%
  ungroup() %>%
  dplyr::filter(
    !TRTESTCD %in% c("TUMSTATE", "LDIAM", "ACNSD", "PCNSD"),
    !is.na(TRDY_sub)
  ) %>%
  ungroup() %>%
  mutate(
    TRSTRESC = case_when(
      TRORRES %in% c("TOO SMALL TO MEASURE (5 MM TO BE USED FOR CALCULATION)", "TOO SMALL TO MEASURE") ~ "5",
      TRUE ~ TRSTRESC
    )
  ) %>%
  select(USUBJID, TRDY_sub, TREVALID, TRTESTCD, TRSTRESC) %>%
  arrange(USUBJID, TRDY_sub, TREVALID) %>%
  tidyr::pivot_wider(
    names_from = TRTESTCD,
    values_from = TRSTRESC,
    values_fn = ~sum(as.numeric(.x))
  ) %>%
  mutate(
    SUMDIAM_num = as.numeric(SUMDIAM),
    SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num),
    SAXIS_num = as.numeric(SAXIS),
    SAXIS_num = if_else(SAXIS %in% c("TOO SMALL TO MEASURE"), 0, SAXIS_num)
  ) %>%
  dplyr::filter(!is.na(SUMDIAM)) %>%
  group_by(USUBJID, TREVALID) %>%
  mutate(
    baseline_value = first(SUMDIAM_num),
    mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
    mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
    nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
    nadir_cummin_value = cummin_na(SUMDIAM_num)
  ) %>%
  mutate(
    percent_change_from_nadir = if_else(
      !is.na(mm_change_from_previous),
      (SUMDIAM_num - lag(nadir_cummin_value))/lag(nadir_cummin_value),
      NA_real_
    )
  ) %>%
  ungroup()



study_three_simulation_prepped_data <- rs_study_three_ratings %>%
  left_join(
    study_three_tr_data,
    by = c("USUBJID" = "USUBJID", "RSDY" = "TRDY_sub", "RSEVALID" = "TREVALID")
  ) %>%
  group_by(USUBJID, RSEVALID)



future::plan(future::multicore, workers = 6)


tr_study_three_simulation_data <-  furrr::future_pmap(all_combos, ~{
  study_three_simulation_prepped_data %>%
    mutate(
      baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
      progressive_disease = (percent_change_from_nadir > ..1) & ((SUMDIAM_num - nadir_cummin_value) > 5),
      partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
      complete_response = SUMDIAM_num == 0
    ) %>%
    mutate(
      target_response_calculated = case_when(
        # SUMNLNLD
        SUMDIAM == "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
        SUMDIAM_num == 0 ~ "COMPLETE RESPONSE (CR)",
        (SUMDIAM_num <= 10 & SAXIS == "TOO SMALL TO MEASURE") ~ "COMPLETE RESPONSE (CR)",
        (SAXIS_num <= 10 & near(SUMDIAM_num, SAXIS_num)) ~ "COMPLETE RESPONSE (CR)",
        # (SAXIS_num <= 10 & (SUMDIAM_num - SAXIS_num <= 10)) ~ "COMPLETE RESPONSE (CR)",
        (((SUMDIAM_num-baseline_value)/baseline_value) <= -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
        ((percent_change_from_nadir >= ..1) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
        is.na(mm_change_from_previous) ~ NA_character_,
        TRUE ~ "STABLE DISEASE (SD)"
      )
    ) %>%
    ungroup() %>%
    mutate(
      overall_response = case_when(
        `New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
        `Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
        target_response_calculated == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
        `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
        `New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
        `New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
        `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "NOT APPLICABLE" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
        `New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
        `New Lesion Present` %in% c("N", "NOT EVALUABLE") & target_response_calculated == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
        `New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "NOT EVALUABLE" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
        `New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
      )
    )
})




saveRDS(tr_study_three_simulation_data, "tr_study_three_simulation_data.rds")

# if (!"tr_study_three_simulation_data" %in% ls()){
#   tr_study_three_simulation_data <- readRDS("tr_study_three_simulation_data.rds")
# }
#
#
#
# ################################ TARGET OUTCOME SIMULATION
#
#
# irr_simulation_target_outcome <- tr_study_three_simulation_data %>%
#   map(~{
#     .x %>%
#       select(USUBJID, RSDY, RSEVALID, target_response_calculated) %>%
#       tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = target_response_calculated) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
# study_three_original_irr_id <- all_combos %>%
#   mutate(
#     id = row_number()
#   ) %>%
#   dplyr::filter(
#     progression_threshold == 0.2,
#     partial_response_threshold == 0.3
#   ) %>%
#   pull(id)
#
#
#
# study_three_original_irr_value_target <- irr_simulation_target_outcome[[study_three_original_irr_id]]$value
#
#
# all_combos %>%
#   mutate(
#     fleiss_kappa_simulation = map_dbl(irr_simulation_target_outcome, ~.x[["value"]]),
#     fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_target,
#     fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_target
#   ) %>%
#   ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
#   geom_tile() +
#   # scale_fill_distiller(palette = "RdBu") +
#   geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
#   geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
#   scale_fill_gradient2(
#     low = "blue",  # Color for negative values
#     mid = "white", # Color at 0
#     high = "red",  # Color for positive values
#     midpoint = 0   # Ensures color transition at zero
#   ) +
#   theme_bw() +
#   labs(
#     title = "Effect of Varying Progression and Response Thresholds on IRR",
#     subtitle = "RECIST 1.1 *Target* Response in Study 3"
#   )
#
#
#
#
#
#
#
#
#
#
#
# ###################################### OVERALL OUTCOME SIMULATION
#
#
#
# irr_simulation_overall_outcome_study_three <- tr_study_three_simulation_data %>%
#   map(~{
#     .x %>%
#       select(USUBJID, RSDY, RSEVALID, overall_response) %>%
#       tidyr::unite("Unique_Observation", c("USUBJID", "RSDY"), remove = T) %>%
#       tidyr::pivot_wider(names_from = RSEVALID, values_from = overall_response) %>%
#       select(-1) %>%
#       irr::kappam.fleiss(., detail = T)
#   })
#
#
#
#
#
# study_three_original_irr_value_overall <- irr_simulation_overall_outcome_study_three[[study_three_original_irr_id]]$value
#
#
# all_combos %>%
#   mutate(
#     fleiss_kappa_simulation = map_dbl(irr_simulation_overall_outcome_study_three, ~.x[["value"]]),
#     fleiss_kappa_deviation = fleiss_kappa_simulation - study_three_original_irr_value_overall,
#     fleiss_kappa_deviation_perc = fleiss_kappa_deviation / study_three_original_irr_value_overall
#   ) %>%
#   ggplot(aes(progression_threshold, partial_response_threshold, fill = fleiss_kappa_deviation)) +
#   geom_tile() +
#   # scale_fill_distiller(palette = "RdBu") +
#   geom_segment(aes(x = 0.2, xend = 0.2, y = 0.0, yend = 0.3)) +
#   geom_segment(aes(x = 0.0, xend = 0.2, y = 0.3, yend = 0.3)) +
#   scale_fill_gradient2(
#     low = "blue",  # Color for negative values
#     mid = "white", # Color at 0
#     high = "red",  # Color for positive values
#     midpoint = 0   # Ensures color transition at zero
#   ) +
#   theme_bw() +
#   labs(
#     title = "Effect of Varying Progression and Response Thresholds on IRR",
#     subtitle = "RECIST 1.1 *Overall* Response in Study 3")
#
#

B.6.0.18 thesis_code/simulate_thresholds/tr_data_study_two.R

 
library(tidyr)

if (!"rs_files_with_multiple_raters" %in% ls()){
  rs_files_with_multiple_raters <- readRDS("rs_files_with_multiple_raters.rds")
}

if (!"tr_where_rs_files" %in% ls()){
  tr_where_rs_files <- readRDS("tr_where_rs_files.rds")
}

cummin_na <- function(x) {
  replace_na(x, Inf) %>% cummin() %>% replace(. == Inf, NA)
}

rs_study_two_ratings <- rs_files_with_multiple_raters[[2]] %>%
  mutate(
    RSEVALID = if_else(is.na(RSEVALID) | RSEVALID == "", RSEVAL, RSEVALID)
  ) %>%
  arrange(UNI_ID, RSDY, RSEVALID) %>%
  dplyr::filter(
    RSDY > 0,
    RSCAT == "RECIST 1.1",
    RSGRPID != "GLOBAL REVIEW",
    RSTESTCD != "RSALL"
  ) %>%
  select(UNI_ID, RSDY, RSEVALID, RSTEST, RSORRES) %>%
  mutate(
    RSORRES = case_when(
      RSTEST == "New Lesion Progression" & RSORRES == "UNEQUIVOCAL" ~ "Y",
      RSTEST == "New Lesion Progression" & RSORRES == "EQUIVOCAL" ~ "N",
      TRUE ~ RSORRES
    ),
    RSTEST = if_else(RSTEST == "New Lesion Progression", "New Lesion Present", RSTEST),
    RSORRES = case_when(
      RSORRES == "CR" ~ "COMPLETE RESPONSE (CR)",
      RSORRES == "PR" ~ "PARTIAL RESPONSE (PR)",
      RSORRES == "SD" ~ "STABLE DISEASE (SD)",
      RSORRES == "PD" ~ "PROGRESSIVE DISEASE (PD)",
      RSORRES == "NE" ~ "NOT EVALUABLE",
      RSORRES == "NON-CR/NON-PD" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
      RSORRES == "NA" ~ "NOT ASSESSABLE",
      TRUE ~ RSORRES
    )
  ) %>%
  pivot_wider(
    values_from = RSORRES,
    names_from = RSTEST
  ) %>%
  mutate(
    `New Lesion Present` = if_else(is.na(`New Lesion Present`), "N", `New Lesion Present`)
  )





# Sum of Longest Diameter info for Investigators does not seem to be available
# If correct, I cannot calculate new Target Outcome responses for Investigator
# ! The necessary info for the investigator appears to be available in the
# ADRS ADaM dataset:
# haven::read_xpt("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer//Study_20250320125552850/YO40245_4_Unconverted/adrs.xpt")

study_two_adrs_data <- haven::read_xpt("/data/care/fileshare/Clinrep/xtranscelerate/data/Cancer//Study_20250320125552850/YO40245_4_Unconverted/adrs.xpt")

# Can only get investigator from the ADRS because the other readers are just
# lumped together as "Independent Review Forum"
# study_two_adrs_data %>%
#   dplyr::filter(
#     PARCAT2 == "Investigator",
#     PARCAT3 == "Recist 1.1"
#   ) %>%
#   count(PARAM, PARAMCD, PARCAT2, PARCAT3) %>%
#   View()

study_two_sld_data_investigator <- study_two_adrs_data %>%
  dplyr::filter(
    PARAMCD %in% c("VISSLD"),
    PARCAT2 == "Investigator"
  ) %>%
  select(-c(REGIONDI:MVI_EHS2)) %>%
  arrange(UNI_ID, ADY) %>%
  select(UNI_ID, TREVALID = PARCAT2, TRDY = ADY, PARAM, AVALC) %>%
  pivot_wider(
    names_from = PARAM,
    values_from = AVALC
  ) %>%
  rename(TRSTRESC = 4) %>%
  mutate(
    TRSTRESN = as.numeric(TRSTRESC),
    TREVALID = "INVESTIGATOR"
    )



# Need to figure out why almost all UNI_ID/TRDY/TREVALID combinations have
# 2 SUMLDIAM measurements rather than just 1. They are often at least
# slightly different values, and I need to know how to choose the correct one.

# The current grouping by UNI_ID, TREVALID, TRDY, TRSPID shows that there is
# a connection between TRSPID and the RECIST vs. HCC rating. I should be able
# to create a sudo variable that I can then filter on to retain the RECIST data

# READER 1 AND READER 2 SLD INFO!!!
study_two_sld_data_radiologists <- tr_where_rs_files[[2]] %>%
  mutate(
    TREVALID = if_else(is.na(TREVALID) | TREVALID == "", TREVAL, TREVALID)
  ) %>%
  arrange(UNI_ID, TREVALID, TRDY, TRSPID) %>%
  group_by(TRSPID) %>%
  mutate(
    sld_grouping = case_when(
      any(str_detect(TRLNKID, "RECIST 1.1")) ~ "RECIST",
      any(str_detect(TRLNKID, "HCC mRECIST")) ~ "HCC",
      TRUE ~ "none"
    )
  ) %>%
  ungroup() %>%
  dplyr::filter(
    TRTESTCD == "SUMLDIAM",
    sld_grouping == "RECIST"
  ) %>%
  select(UNI_ID, TREVALID, TRDY, TRSTRESC, TRSTRESN)


#
# study_two_sld_data_radiologists %>%
#   group_by(UNI_ID, TREVALID) %>%
#   mutate(
#     visit_number = row_number()
#   ) %>%
#   ungroup() %>%
#   select(-TRSTRESC, -TRSTRESN) %>%
#   pivot_wider(
#     names_from = TREVALID,
#     values_from = visit_number
#   ) %>%
#   mutate(
#     check = `RADIOLOGIST 1` == `RADIOLOGIST 2`
#   ) %>%
#   View()


# study_two_sld_data_radiologists %>%
#   group_by(UNI_ID) %>%
#   arrange(UNI_ID, TRDY) %>%
#   mutate(fuzzy_date_group = cumsum(c(1, diff(TRDY)) > 4)) %>%
#   ungroup() %>%
#   select(-TRSTRESC, -TRSTRESN) %>%
#   View()
#   pivot_wider(
#     names_from = TREVALID,
#     values_from = fuzzy_date_group
#   ) %>%
#   mutate(
#     check = `RADIOLOGIST 1` == `RADIOLOGIST 2`
#   ) %>%
#   View()






study_two_tr_data <- study_two_sld_data_investigator %>%
  bind_rows(study_two_sld_data_radiologists) %>%
  mutate(
    TRDY_reduced = if_else(
      (UNI_ID == "f58b117eaf882d2d7524288703747835fef69913bfaf91972f465cd8ddb53386" & TRDY == -22) |
      (UNI_ID == "a4fa042453ec49e54bca11bb04bd9d5447e6ce8567c4257719c8f33273b2567b" & TRDY == -20) |
      (UNI_ID == "f29e9a84fa93f7644fd4d5578e605797f6978f03485ca8f089a265330bc49028" & TRDY == -27) |
      (UNI_ID == "b5483e217e5468468097734c8e5d6c2dd8adb43d0548a3db740fb148a13176a8" & TRDY == -25) |
      (UNI_ID == "b5483e217e5468468097734c8e5d6c2dd8adb43d0548a3db740fb148a13176a8" & TRDY == -14) |
      (UNI_ID == "f493993091c878bdf73c5a860283ee8f4c9f011e4e5dfa40fa25b4a4ae749e2b" & TRDY == -14) |
      (UNI_ID == "adf20c5d546f5e29de31b18c24ed22539bdb4e3ea5d1faf2182de155002e6e8b" & TRDY == -16),
      -5,
      TRDY
    )
  ) %>%
  group_by(UNI_ID) %>%
  arrange(UNI_ID, TRDY_reduced) %>%
  mutate(fuzzy_date_group = cumsum(c(1, diff(TRDY_reduced)) >= 9)) %>%
  ungroup() %>%
  group_by(UNI_ID) %>%
  ungroup() %>%
  select(-TRSTRESN)



study_two_joined_rs_tr_data <- rs_study_two_ratings %>%
  fuzzyjoin::fuzzy_right_join(
    study_two_tr_data,
    by = c("UNI_ID" = "UNI_ID", "RSEVALID" = "TREVALID", "RSDY" = "TRDY"),
    match_fun = list(`==`, `==`, function(x,y) abs(x - y) <= 10)
  )


# View(study_two_joined_rs_tr_data)



study_two_finalized_data <- study_two_joined_rs_tr_data %>%
  mutate(
    TRSTRESC = if_else(TRSTRESC == "NE", "NOT EVALUABLE", TRSTRESC)
  ) %>%
  rename(
    SUMDIAM = TRSTRESC
  ) %>%
  mutate(
    SUMDIAM_num = as.numeric(SUMDIAM),
    SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num)
  ) %>%
  dplyr::filter(!is.na(SUMDIAM)) %>%
  group_by(UNI_ID.y, TREVALID) %>%
  mutate(
    baseline_value = first(SUMDIAM_num),
    mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
    mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
    nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
    nadir_cummin_value = cummin_na(SUMDIAM_num)
  ) %>%
  mutate(
    percent_change_from_nadir = if_else(
      !is.na(mm_change_from_previous),
      (SUMDIAM_num - lag(nadir_cummin_value))/lag(nadir_cummin_value),
      NA_real_
    )
  ) %>%
  mutate(
    baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
    progressive_disease = (percent_change_from_nadir > 0.2) & ((SUMDIAM_num - nadir_cummin_value) > 5),
    partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
    complete_response = SUMDIAM_num == 0
  ) %>%
  mutate(
    target_response_calculated = case_when(
      # SUMNLNLD
      SUMDIAM == "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
      SUMDIAM_num == 0 ~ "COMPLETE RESPONSE (CR)",
      ( round(((SUMDIAM_num-baseline_value)/baseline_value), 3) <= -0.30) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
      ((percent_change_from_nadir >= 0.2) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
      is.na(mm_change_from_previous) ~ NA_character_,
      TRUE ~ "STABLE DISEASE (SD)"
    )
  ) %>%
  ungroup() %>%
  mutate(
    target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
    target_check = `Target Response` == target_response_calculated
    ) %>%
  mutate(
    overall_response = case_when(
      `New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
      `Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
      `Target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
      `New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
      `New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
      `New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
      `New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
      `New Lesion Present` == "N" & `Target Response` == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
      `New Lesion Present` == "NOT APPLICABLE" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
      `New Lesion Present` == "N" & `Target Response` == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
      `New Lesion Present` %in% c("N", "NOT EVALUABLE") & `Target Response` == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
      `New Lesion Present` == "N" & `Target Response` == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
      `New Lesion Present` == "NOT EVALUABLE" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
      `New Lesion Present` == "N" & `Target Response` == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
    )
  )







study_two_simulation_prepped_data <- study_two_joined_rs_tr_data %>%
  mutate(
    TRSTRESC = if_else(TRSTRESC == "NE", "NOT EVALUABLE", TRSTRESC)
  ) %>%
  rename(
    SUMDIAM = TRSTRESC
  ) %>%
  mutate(
    SUMDIAM_num = as.numeric(SUMDIAM),
    SUMDIAM_num = if_else(SUMDIAM == "NOT EVALUABLE", NA_real_, SUMDIAM_num)
  ) %>%
  dplyr::filter(!is.na(SUMDIAM)) %>%
  group_by(UNI_ID.y, TREVALID) %>%
  mutate(
    baseline_value = first(SUMDIAM_num),
    mm_change_from_baseline = SUMDIAM_num - first(SUMDIAM_num),
    mm_change_from_previous = SUMDIAM_num - lag(SUMDIAM_num),
    nadir_absolute_value = min(SUMDIAM_num, na.rm = T),
    nadir_cummin_value = cummin_na(SUMDIAM_num)
  ) %>%
  mutate(
    percent_change_from_nadir = if_else(
      !is.na(mm_change_from_previous),
      (SUMDIAM_num - lag(nadir_cummin_value))/lag(nadir_cummin_value),
      NA_real_
    )
  )




all_combos <- expand.grid(
  progression_threshold = seq(0,1,0.01),
  partial_response_threshold = seq(0,1,0.01)
)



?plan

future::plan(future::multicore, workers = 6)


tr_study_two_simulation_data <- furrr::future_pmap(all_combos, ~{
  study_two_simulation_prepped_data %>%
    mutate(
      baseline_response = ((SUMDIAM_num-baseline_value)/baseline_value),
      progressive_disease = (percent_change_from_nadir > ..1) & ((SUMDIAM_num - nadir_cummin_value) > 5),
      partial_response = (((SUMDIAM_num-baseline_value)/baseline_value) < -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease,
      complete_response = SUMDIAM_num == 0
    ) %>%
    mutate(
      target_response_calculated = case_when(
        # SUMNLNLD
        SUMDIAM == "NOT ASSESSABLE" ~ "NOT ASSESSABLE",
        SUMDIAM_num == 0 ~ "COMPLETE RESPONSE (CR)",
        ( round(((SUMDIAM_num-baseline_value)/baseline_value), 3) <= -..2) & (((SUMDIAM_num-baseline_value)/baseline_value) > -1) & (SUMDIAM_num != 0) & !progressive_disease ~ "PARTIAL RESPONSE (PR)",
        ((percent_change_from_nadir >= ..1) & ((SUMDIAM_num - nadir_cummin_value) >= 5)) ~ "PROGRESSIVE DISEASE (PD)",
        is.na(mm_change_from_previous) ~ NA_character_,
        TRUE ~ "STABLE DISEASE (SD)"
      )
    ) %>%
    ungroup() %>%
    mutate(
      target_response_calculated = if_else(`Target Response` == "NOT EVALUABLE", "NOT EVALUABLE", target_response_calculated),
      target_check = `Target Response` == target_response_calculated
    ) %>%
    mutate(
      overall_response_calculated = case_when(
        `New Lesion Present` == "Y" ~ "PROGRESSIVE DISEASE (PD)",
        `Non-target Response` == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
        target_response_calculated == "PROGRESSIVE DISEASE (PD)" ~ "PROGRESSIVE DISEASE (PD)",
        `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("COMPLETE RESPONSE (CR)", "NOT APPLICABLE") ~ "COMPLETE RESPONSE (CR)",
        `New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "COMPLETE RESPONSE (CR)",
        `New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` == "COMPLETE RESPONSE (CR)" ~ "STABLE DISEASE (SD)",
        `New Lesion Present` == "N" & target_response_calculated == "COMPLETE RESPONSE (CR)" & `Non-target Response` %in% c("NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)", "NOT ASSESSABLE") ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "NOT APPLICABLE" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NOT APPLICABLE" ~ "",
        `New Lesion Present` == "N" & target_response_calculated == "NOT APPLICABLE" & `Non-target Response` == "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)" ~ "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)",
        `New Lesion Present` %in% c("N", "NOT EVALUABLE") & target_response_calculated == "NOT ASSESSABLE" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
        `New Lesion Present` == "N" & target_response_calculated == "PARTIAL RESPONSE (PR)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "PARTIAL RESPONSE (PR)",
        `New Lesion Present` == "NOT EVALUABLE" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "NOT EVALUABLE",
        `New Lesion Present` == "N" & target_response_calculated == "STABLE DISEASE (SD)" & `Non-target Response` %in% c("NOT APPLICABLE", "NOT ASSESSABLE", "NON-COMPLETE RESPONSE/NON-PROGRESSIVE DISEASE (NON-CR/NON-PD)") ~ "STABLE DISEASE (SD)"
      )
    ) %>%
    select(matches("UNI_ID"), RSEVALID, RSDY, `Overall Response`, `Target Response`, `Non-target Response`,
           `New Lesion Present`, SUMDIAM_num:baseline_response,
           target_response_calculated, overall_response_calculated) %>%
    dplyr::filter(!is.na(RSEVALID))
})





saveRDS(tr_study_two_simulation_data, "tr_study_two_simulation_data.rds")