#### functions for p-curve ES. Code adapted from Uri Simonsohn #### ###################################### # pcurve_loss function # this code mirrors the functionality of the original loss function # written by Simonsohn et al. pcurve_loss <- function(pc_data, dobs) { options(warn=-1) t.sig <- pc_data$t_obs df.sig <- pc_data$df_obs ncp_est <- sqrt((df.sig+2)/4)*dobs tc <- qt(.975, df.sig) power_est <- 1-pt(tc, df.sig, ncp_est) p_larger <- pt(t.sig,df=df.sig,ncp=ncp_est) ppr <- (p_larger-(1-power_est))/power_est # Problem: ks.test gives an error if the number of test statistics is small and # bootstrapping selects a weird sample. In case of errors, return a large loss value KSD <- tryCatch({ ks.test(ppr, punif)$statistic }, error = function(e) { return(1e10) # return a large loss function value }) # print progression of loss function #cat(paste0("dobs=", round(dobs, 3), "; loss=", round(KSD, 3), "\n")) options(warn=0) return(KSD) } ########################## # pcurveEst is the function that should be called to provide p-curve # estimates of effect size, in addition to bootstrapped confidence intervals # (if desired). See parameters below for details. # #' @param t t-values #' @param df degrees of freedom #' @param CI Should the CI be computed? (Needs bootstrapping; takes long) #' @param level The coverage of the CI (default: 95%) #' @param B Number of bootstrap samples for CI #' @param progress Should a progress bar be displayed for the CI bootstrapping? #' @param long Should the results be returned in long format? pcurveEst <- function(t, df, CI=TRUE, level=.95, B=1000, progress=TRUE, long=TRUE) { require(dplyr) # dplyr is needed for filter functions below out <- matrix(NA, 1, 3) colnames(out) <- c("dPcurve","lbPcurve","ubPcurve") # define dmin and dmax (the range of parameter search) dmin <- 0 dmax <- 4 # pcurve_prep is called first to sort the data into a frame and verify it is compatible pc_data = pcurve_prep(t_obs = t, df_obs = df) # next we check to make sure we have more than 0 rows (at least 1 study); if not, return a null if(nrow(pc_data) == 0){ outlong <- data.frame(method="pcurve", term="b0", variable=c("estimate"), value=NA) return(outlong) } # now let's get the pcurve ES estimate #out[1, 1] <- optim(par=0, fn=pcurve_loss, pc_data = pc_data, method="BFGS")$par # Update from ARawles - limits to positive results (which is intended by pcurve anyway) out[1,1] <- optimize(pcurve_loss, interval = c(0, 4), pc_data = pc_data)$minimum if (CI==TRUE) { warning("CI not properly implemented.") #d.boot <- pcurve_estimate_d_CI(pc_data = pc_data, dmin=dmin, dmax=dmax, B=B, progress=progress) #CI.est <- quantile(d.boot, prob=c((1-level)/2, 1-(1-level)/2)) #out[1, 2:3] <- CI.est } if (long==FALSE) { return(out) } else { outlong <- data.frame(method="pcurve", term="b0", variable=c("estimate", "conf.low", "conf.high"), value=out[1, ]) # add number of significant studies that entered p-curve outlong <- plyr::rbind.fill(outlong, data.frame( method="pcurve", term="kSig", variable="estimate", value=nrow(pc_data) )) rownames(outlong) <- NULL return(outlong) } } ########################## # pcurve_prep takes in vectors of t-values and associated degs. freedom # packages everything into a data.frame # strips out anything with p > .05 # also strips out any negative t-values # all pre-processing and validation should go in here pcurve_prep <- function(t_obs, df_obs){ # first, calculate p-values and d-values for all studies d_vals = (t_obs*2)/sqrt(df_obs) p_vals = (1 - pt(t_obs, df_obs)) * 2 # then, shove everything into a data.frame to keep things organized unfiltered_data = data.frame(t_obs, df_obs, p_vals, d_vals) # now for the checks. # strip out anything that is NS, or if t < 0. # Note that we're not throwing any warnings out here. unfiltered_data = filter(unfiltered_data, t_obs > 0) clean_data = filter(unfiltered_data, p_vals < .05) # all done! return(clean_data) } ###################################### # pcurve_estimate_d_CI obtains bootstrapped resamples of the provided dataset # and estimates the ES of every resample using pcurve. pcurve_estimate_d_CI <- function(pc_data, dmin, dmax, B, progress=TRUE) { d.boot <- c() if (progress==TRUE) { require(progress) pb <- progress_bar$new(format="Bootstrapping [:bar] :percent ETA: :eta", total=B, clear=FALSE) } for (i in 1:B) { if (progress==TRUE) pb$tick() # get a random resample, with replacement # note that sample() doesn't work here, necessary to use sample_n() resample_data = sample_n(pc_data, length(pc_data$t_obs), replace=TRUE) #print(resample_data) # d.boot <- c(d.boot, optimize(pcurve_loss, c(dmin, dmax), pc_data = resample_data)$minimum) } return(d.boot) } # set.seed(1) #dat <- dataMA(k = 10, delta = -0.7, tau = 0, empN = TRUE, maxN=500, minN=0, meanN=0, selProp = 0, qrpEnv = "none") #dat <- data.frame(dat) # # test: biased set of studies # dat <- data.frame(dataMA(k = 40, delta = 0.15, tau = 0.05, empN = TRUE, maxN=500, minN=0, meanN=0, selProp = 0.9, qrpEnv = "low")) # pcurveEst(dat$t, dat$n1 + dat$n2 - 2, CI=FALSE) # pc_skew(dat$t, dat$n1 + dat$n2 - 2) # Compare with p-curve.com # cat(paste(paste0("t(", dat$n1 + dat$n2 - 2, ")=", round(dat$t, 4)), collapse="\n")) #TPSM.est(dat$t, dat$n1, dat$n2)