|
| 1 | +#' @export |
| 2 | +#' |
| 3 | +#' @author Rachel Carroll <rachelcarroll4@@gmail.com> |
| 4 | +#' @author Loren Collingwood <lcollingwood@@unm.edu> |
| 5 | +#' |
| 6 | +#' @title Normalize RPV results |
| 7 | +#' |
| 8 | +#' @description Create a dataframe of normalized RPV results when using the |
| 9 | +#' cvap, vap, or bisg denominator method, i.e., take RPV results only among |
| 10 | +#' people estimated to have voted. |
| 11 | +#' |
| 12 | +#' @param ei_object Output from \code{ei_iter()} or \code{ei_rxc()} |
| 13 | +#' @param cand_cols A character vector of the candidate column names to be |
| 14 | +#' normalized from \code{ei_object}. Only use candidate column name columns, |
| 15 | +#' not the No Vote column. |
| 16 | +#' @param race_cols A character vector of the racial group column names to |
| 17 | +#' be normalized from \code{ei_object} |
| 18 | +#' |
| 19 | +#' @return Normalized RPV results in a data.frame |
| 20 | +#' @examples |
| 21 | +#' \donttest{ |
| 22 | +#' #library(eiCompare) |
| 23 | +#' #data("south_carolina") |
| 24 | +#' #prec_election_demog <- south_carolina[1:50,] |
| 25 | +#' |
| 26 | +#' ## run rpv using eiCompare rxc method |
| 27 | +#' #rxcVote <- ei_rxc( |
| 28 | +#' # data = prec_election_demog, |
| 29 | +#' # cand_cols = c('pct_mcmaster', 'pct_smith', 'pct_other_gov', 'pct_NoVote_gov'), |
| 30 | +#' # race_cols = c('pct_white', 'pct_black', 'pct_race_other'), |
| 31 | +#' # totals_col = "total_vap") |
| 32 | +#' |
| 33 | +#' ## normalize results accounting for no vote using rpv_normalize() |
| 34 | +#' ## only include the candidate and race cols of interest for the rpv analysis |
| 35 | +#' #rpv_results <- rpv_normalize( |
| 36 | +#' # ei_object = rxcVote, |
| 37 | +#' # cand_cols = c('pct_mcmaster', 'pct_smith', 'pct_other_gov'), |
| 38 | +#' # race_cols = c('pct_white', 'pct_black') |
| 39 | +#' #) |
| 40 | +#' } |
| 41 | + |
| 42 | +rpv_normalize <- function(ei_object, cand_cols, race_cols){ |
| 43 | + |
| 44 | + #---------------------------- QC -----------------------------------# |
| 45 | + |
| 46 | + # Make sure ei_object is correct class |
| 47 | + if( !inherits(ei_object, "eiCompare") ) { |
| 48 | + stop("ei_object must be an eiCompare output from ei_iter() or ei_rxc()") |
| 49 | + } |
| 50 | + # Make sure all cand_cols are in the ei_object |
| 51 | + canddiff <- setdiff( |
| 52 | + cand_cols, |
| 53 | + unique(ei_object$estimates$cand) |
| 54 | + ) |
| 55 | + |
| 56 | + if( length(canddiff) > 0 ) { |
| 57 | + stop("cand_cols values are not found in ei_object") |
| 58 | + } |
| 59 | + |
| 60 | + # Make sure all race_cols are in the ei_object |
| 61 | + racediff <- setdiff( |
| 62 | + race_cols, |
| 63 | + unique(ei_object$estimates$race) |
| 64 | + ) |
| 65 | + |
| 66 | + if( length(racediff) > 0 ) { |
| 67 | + stop("race_cols values are not found in ei_object") |
| 68 | + } |
| 69 | + |
| 70 | + #----------------------- Helper Function -----------------------------# |
| 71 | + # Create calculation helper function (used in lapply below) |
| 72 | + # candNm = name of cand col in samplesDF (an element from cand_cols ) |
| 73 | + # raceNm = name of race/demographic samples contained in samplesDF (an |
| 74 | + # elements from race_cols) |
| 75 | + # samplesDF = data.frame with vote samples from eiCompare model for each |
| 76 | + # candidate in cand_cols and sums across all candidates. The samples |
| 77 | + # in samplesDF will be associated with a single race from race_cols |
| 78 | + |
| 79 | + sample_calcs <- function(candNm, raceNm, samplesDF){ |
| 80 | + |
| 81 | + # get share estimate |
| 82 | + share.est <- samplesDF %>% |
| 83 | + dplyr::pull(candNm)/samplesDF$sum |
| 84 | + |
| 85 | + # get average |
| 86 | + mean <- mean(share.est) |
| 87 | + # get lower and upper bounds |
| 88 | + quantile <- quantile(share.est, c(.025, .975)) |
| 89 | + |
| 90 | + # compile results into a data.frame row |
| 91 | + row <- data.frame("mean" = round(mean * 100,2), |
| 92 | + "ci_95_lower" = round(quantile[1] * 100, 2), |
| 93 | + "ci_95_upper" = round(quantile[2] * 100, 2)) |
| 94 | + |
| 95 | + # set col and row names |
| 96 | + colnames(row) <- paste(raceNm, colnames(row), sep = ".") |
| 97 | + rownames(row) <- candNm |
| 98 | + |
| 99 | + # return results |
| 100 | + return(row) |
| 101 | + } |
| 102 | + |
| 103 | + #------------------ Calculate Normalized RPV Results --------------------# |
| 104 | + |
| 105 | + # Initiate lists/vars |
| 106 | + samples <- list() |
| 107 | + results <- list() |
| 108 | + ncands <- length(cand_cols) |
| 109 | + |
| 110 | + # loop through race |
| 111 | + for ( j in seq(length(race_cols))) { |
| 112 | + |
| 113 | + # store race name |
| 114 | + race_j_name <- race_cols[j] |
| 115 | + |
| 116 | + # get eiCompare model samples for each cand |
| 117 | + for ( i in seq(ncands) ) { |
| 118 | + # vote samples |
| 119 | + samples[[race_j_name]][[cand_cols[i]]] <- |
| 120 | + ei_object$district_samples[[paste(cand_cols[i], race_j_name, sep="_")]] |
| 121 | + } |
| 122 | + |
| 123 | + # create df of samples of all cands for a given demographic preference |
| 124 | + race_j_samples <- samples[[j]] |
| 125 | + samplesDF <- dplyr::bind_cols(race_j_samples) |
| 126 | + # sum across all cand fields |
| 127 | + samplesDF$sum <- as.numeric(apply(samplesDF[,1:ncands], 1, sum)) |
| 128 | + |
| 129 | + # use helper function to get normalized point estimates and lower/upper bounds |
| 130 | + resultsList <- lapply(cand_cols, sample_calcs, |
| 131 | + raceNm = race_j_name, samplesDF = samplesDF) |
| 132 | + |
| 133 | + # store results |
| 134 | + results[[j]] <- dplyr::bind_rows(resultsList) |
| 135 | + } |
| 136 | + |
| 137 | + # combine results into one dataframe |
| 138 | + out <- dplyr::bind_cols(results) |
| 139 | + |
| 140 | + #------------------------ Print and Return --------------------------# |
| 141 | + |
| 142 | + #return as object |
| 143 | + return(out) |
| 144 | + |
| 145 | +} |
| 146 | + |
0 commit comments