## ################################################################################### ## Author: Oleg Sofrygin ## Maintainer: Oleg Sofrygin (Oleg.Sofrygin@kp.org) ## Copyright 2016 Oleg Sofrygin ## This R program is free software: you can redistribute it and/or modify it under the terms of ## the GNU General Public License as published by the Free Software Foundation, either version 3 ## of the License, or (at your option) any later version. ## This R program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; ## without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. ## See the GNU General Public License for more details. ## See for a copy of the GNU General Public License. ## ################################################################################### # DO NOT EDIT THIS LINE: ## @knitr long_to_wide_macro ################################################################################### ### convert_long_to_wide.R # This macro does several things: # 1) Takes data in long format (dat) and returns a list of two dataframes: dat_wide, regim_wide # *) dat_wide: wide format dataframe, structured as: (ID_var, W, L(t), C(t), A(t), newY(t)) # *) regim_wide: wide format regimens dataframe, structured as: (ID_var, regimens) # 2) Removes all observations with follow-up over tint_max-1 # *) only keeps observations which satisfy: dat[,t_var] < tint_max # 3) dat_wide gets new outcome, which is defined by shifting oldY outcome backward in time by 1 # *) new outcome gets named according to newY argument # *) newY(t)=oldY(t+1) for all t=0,...,t_max # 4) Once the outcome is observed, newY(t)=1, it is carried forward until the end of FUP # NOT IMPLEMENTED YET: # 5) Once censoring is observed, C(t)=1, all variables to the right are set to NA (except for outcome) # 6) Converts all censoring variables (C) to factors with levels "censored" / "uncensored" # *) input data should be coded as C=1 for censoring events ################################################################################### ### arguments: # # dat - original data frame in long format (repeated measures per observation) # ID_var - unique patient identifier # t_var - follow-up time variable # tint_max - max number of time intervals to be kept in output data # W - vector of baseline covariate names # L - vector of time-varying covariate names # C - vector of censoring names # A - treatment name # oldY - name of the outcome variable in input data # newY - name for the redefined outcome in new data (can be the same as oldY) # regimens - vector of variable names defining regimens in input data ################################################################################### f_Long_to_Wide <- function(dat, ID_var, t_var, tint_max, W, L, C, A, oldY, newY, regimens) { #-------------------------------------------------------------------------------- # TIME ORDERING STRUCTURE: #-------------------------------------------------------------------------------- finTD_vars <- c(L, C, A, newY) # TIME ORDERING OF TIME-DEP VARS keep_vars <- c(ID_var,t_var,W,finTD_vars) # LIST OF VARS THAT WILL MAKE IT INTO FINAL DATASET #-------------------------------------------------------------------------------- # shift Y with Y_tm1(t) = Y(t+1) #-------------------------------------------------------------------------------- Y_tm1 <- by(dat[,c(ID_var,t_var,Y)], dat[,ID_var], function(sub_dat) { sub_dat[nrow(sub_dat)-1,Y] <- sub_dat[nrow(sub_dat),Y] return(t(sub_dat[-nrow(sub_dat),])) }) Y_tm1_mtx <- matrix(unlist(Y_tm1), ncol = 3, byrow = TRUE) Y_tm1_df <- data.frame(Y_tm1_mtx[,1], as.integer(Y_tm1_mtx[,2]), as.integer(Y_tm1_mtx[,3])) names(Y_tm1_df) <- c(ID_var, t_var, newY) #-------------------------------------------------------------------------------- # remove extra covars, remove last row for all obs and keep new def of Y: #-------------------------------------------------------------------------------- # add new Y def removing last row for each patient dat_newY <- merge(dat, Y_tm1_df, by=c(ID_var,t_var), all=F, sort=F) #-------------------------------------------------------------------------------- # Set all censoring, treatment and Y nodes to NA after censoring event #-------------------------------------------------------------------------------- dat_newY[dat_newY[, C[1]]%in%1, c(C[2:3],A,newY)] <- NA # print(head(dat_newY[dat_newY[, C[1]]%in%1, c(C,A,newY)],100)) dat_newY[dat_newY[, C[2]]%in%1, c(C[3],A,newY)] <- NA # print(head(dat_newY[dat_newY[, C[2]]%in%1, c(C,A,newY)],100)) dat_newY[dat_newY[, C[3]]%in%1, c(A,newY)] <- NA # print(head(dat_newY[dat_newY[, C[3]]%in%1, c(C,A,newY)],100)) #-------------------------------------------------------------------------------- # remove obs that appear at or after tmax (keeps t=[0,...,tmax-1]) # keep final var names # convert to wide fmt #-------------------------------------------------------------------------------- dat_tmax <- dat_newY[(dat_newY[,t_var] < tint_max),names(dat_newY)%in%keep_vars] dat_wide <- reshape(data=dat_tmax, v.names=finTD_vars, timevar=t_var, idvar=ID_var, direction='wide', sep="_") # save regimen flags separately (also convert to wide fmt) dat_tmax_regim <- dat_newY[(dat_newY[,t_var]0) { if (ind_1