--- title: "Crunching Function for XlsForm" output: rmarkdown::html_vignette graphics::title: "Developing and documenting all functions" vignette: > %\VignetteIndexEntry{crunching-function-for-xlsform} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r, include = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>" ) ``` ```{r setup} library(kobocruncher) ``` # Data examples to demo the package # Preparing objects ## Data loading ```{r examples-kobo_data} datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) # MainFrame datalist[["main"]] # Second Frame - based on presence of repeat within the form, aka nested or # hierarchical data structure, etc... datalist[["members"]] ``` ## Extend the xlsform to add instructions for the analysis plan Now we can extend the xlsform that was used to document key next steps in the data preparation. ```{r examples-kobo_prepare_form} kobo_prepare_form(xlsformpath = system.file("form.xlsx", package = "kobocruncher"), xlsformpathout = NULL, label_language = "") ``` ## Prepare data dictionnary ```{r examples-kobo_dico} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) # Survey questions <- as.data.frame(dico[["variables"]]) knitr::kable(utils::head(questions, 10)) # Choices responses <- as.data.frame(dico[["modalities"]]) knitr::kable(utils::head(responses, 10)) # Settings metadata <- as.data.frame(dico[["settings"]]) knitr::kable(utils::head(metadata, 10)) # Report ToC toc <- as.data.frame(dico[["plan"]]) knitr::kable(utils::head(toc, 10)) # Indicator indicator <- as.data.frame(dico[["indicator"]]) knitr::kable(utils::head(indicator, 10)) ``` # Data Processing ## Indicator Calculation Indicator calculation ```{r examples-kobo_indicator} xlsformpath <- system.file("sample_xlsform.xlsx", package = "kobocruncher") xlsformpathout <- paste0(tempdir(),"/", "sample_xlsform_withindic.xlsx") dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) ## Check if we add no indicator expanded <- kobo_indicator(datalist = datalist, dico = dico, indicatoradd = NULL , xlsformpath = xlsformpath, xlsformpathout = xlsformpathout) ## Example 1: Simple dummy filter indicatoradd <- c( name = "inColombia", type = "select_one", label = "Is from Colombia", repeatvar = "main", calculation = "dplyr::if_else(datalist[[\"main\"]]$profile.country == \"COL\", \"yes\",\"no\")") expanded <- kobo_indicator(datalist = datalist, dico = dico, indicatoradd = indicatoradd , xlsformpath = xlsformpath, xlsformpathout = xlsformpathout) ## Replace existing dico <- expanded[["dico"]] datalist <- expanded[["datalist"]] ## Check my new indicator table(datalist[[1]]$inColombia, useNA = "ifany") ## Example 2: calculation on nested elements and build an indicator list indicatoradd2 <- c( name = "hasfemalemembers", type = "select_one", label = "HH has female members ", repeatvar = "main", calculation = "datalist[[\"members\"]] |> dplyr::select( members.sex, parent_index) |> tidyr::gather( parent_index, members.sex) |> dplyr::count(parent_index, members.sex) |> tidyr::spread(members.sex, n, fill = 0) |> dplyr::select( female)") indicatorall <- list(indicatoradd, indicatoradd2 ) expanded <- kobo_indicator(datalist = datalist, dico = dico, indicatoradd = indicatorall , xlsformpath = xlsformpath, xlsformpathout = xlsformpathout) ## Replace existing dico <- expanded[["dico"]] datalist <- expanded[["datalist"]] ## Check my new indicator table(datalist[[1]]$hasfemalemembers, useNA = "ifany") # Example of calculations: # # 1. Create a filters on specific criteria # 'dplyr::if_else(datalist[["main"]]$variable =="criteria", "yes","no")' # # # 2. Ratio between 2 numeric variable # 'datalist[["main"]]$varnum1 / datalist[["main"]]$varnum2' # # # 3. Calculation on date - month between data and now calculated in months # 'lubridate::interval( datalist[["main"]]$datetocheck, # lubridate::today()) %/% months(1)' # # 4. Discretization of numeric variable according to quintile # 'Hmisc::cut2(datalist[["main"]]$varnum, g =5)' # # 5. Discretization of numeric variable according to fixed break - # for instance case size from integer to categoric # 'cut(datalist[["main"]]$casesize, breaks = c(0, 1, 2, 3,5,30), # labels = c("Case.size.1", "Case.size.2", "Case.size.3", # "Case.size.4.5", "Case.size.6.or.more" ), include.lowest=TRUE)' # # 6. Aggregate variable from nested frame (aka within repeat) to parent table # 'datalist[["members"]] |> # dplyr::select( members.sex, parent_index) |> # tidyr::gather( parent_index, members.sex) |> # dplyr::count(parent_index, members.sex) |> # tidyr::spread(members.sex, n, fill = 0) |> # dplyr::select( female)' ``` ## Weight the dataset ```{r example-kobo_weight} #kobo_weight() ``` ## Assess Disclosure Risk to do.... ```{r examples-kobo_anonymise} # dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) # datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) # # kobo_anonymise(datalist = datalist, # dico = dico, # indicatoradd = indicatoradd ) ``` # Labeling functions ## Get the correct frame for a specific variable ```{r examples-kobo_frame} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) data <- kobo_frame(datalist = datalist, dico = dico, var = "members.sex" ) knitr::kable(utils::head(data,5)) ``` ## Get the label for a specific variable ```{r examples-label_varname} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) label_varname(dico = dico, x ="profile.country") ``` ## Get interpretation hint for a specific variable ```{r examples-label_varhint} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) label_varhint(dico = dico, x ="profile.country") ``` ## Get all the choices labels options for a specific variable ```{r examples-label_choiceset} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) data <- kobo_frame(datalist = datalist, dico = dico, var = "profile.country" ) label_choiceset(dico = dico, x="profile.country")(data$profile.country) ## Test when there's no dictionnary data$profile.occupation label_choiceset(dico = dico, x="profile.occupation")(data$profile.occupation) label_choiceset(dico = dico, x="profile.occupation")(data$profile.occupation) ``` # Plotting Functions ## Univariate ## Plotting Select one variable ```{r examples-plot_select_one} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_select_one(datalist = datalist, dico = dico, var = "profile.country", showcode = TRUE) ## Exmaple with lumping plot_select_one(datalist = datalist, dico = dico, var = "profile.country", n = 1, showcode = TRUE) # plot_select_one(datalist = datalist, # dico = dico, # var = "profile.countryerror", # showcode = TRUE) ``` ## Plotting Select multiple variable ```{r examples-plot_select_multiple} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_select_multiple(datalist = datalist, dico = dico, var = "profile.reason", datasource = NULL, showcode = TRUE ) ## Displaying the usage of the lumping option.. plot_select_multiple(datalist = datalist, dico = dico, var = "profile.reason", n = 5, datasource = NULL, showcode = TRUE ) # plot_select_multiple(datalist = datalist, # dico = dico, # var = "profile.reason1", # showcode = TRUE # ) ``` ## Plotting Numeric variable ```{r examples-plot_integer} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_integer(datalist = datalist, dico = dico, var = "members.age", showcode = TRUE) ``` ## Plotting Open Text variable ```{r examples-plot_text} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_text(datalist = datalist, dico = dico, var = "profile.occupation", showcode = TRUE) ``` ## Bivariate ## Plotting Select one variable with cross tabulation ```{r examples-plot_select_one_cross} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_select_one_cross(datalist = datalist, dico = dico, var = "profile.country", by_var = "profile.occupation", showcode = TRUE ) ## test if variable are not in the same frame... plot_select_one_cross(datalist = datalist, dico = dico, var = "profile.country", by_var = "members.sex", n = 5, n_by = 5, showcode = TRUE ) ``` ## Plotting Select multiple variable with cross-tabulation ```{r examples-plot_select_multiple_cross} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_select_multiple_cross(datalist = datalist, dico = dico, var = "profile.reason", by_var = "location", showcode = TRUE) ## test lumping plot_select_multiple_cross(datalist = datalist, dico = dico, var = "profile.reason", by_var = "location", n = 4, showcode = TRUE) ``` ## Plotting Numeric variable with cross-tabulation ```{r examples-plot_integer_cross} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_integer_cross(datalist = datalist, dico = dico, var = "members.age", by_var = "members.sex", showcode = TRUE) ``` ## Plotting Correlation ```{r examples-plot_correlation} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) plot_correlation(datalist = datalist, dico = dico, var = "profile.occupation", by_var = "profile.country", datasource = NULL) ``` ## Multivariate ## Plotting Likert ```{r examples-plot_likert} dicolikert <- kobo_dico( xlsformpath = system.file("form_likert.xlsx", package = "kobocruncher") ) datalistlikert <- kobo_data(datapath = system.file("data_likert.xlsx", package = "kobocruncher") ) plot_likert(datalist = datalistlikert, dico = dicolikert, datasource = NULL, scopei = "group_ei8jz33", repeatvari = "main", ## getting the list_name and corresponding label list_namei = "yk0td68" ) ``` ## Plotting clusters to do.... ## Plotting prediction to do.... ## Plotting scores to do.... ## Plotting Header variable ```{r examples-plot_header} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) plot_header( dico = dico, var = "profile.profile") # class(plot_header( dico = dico, # var = "profile.profile")) # dput(plot_header( dico = dico, var = "profile.profile")) # message(plot_header( dico = dico, var = "profile.profile")) cat(plot_header( dico = dico, var = "profile.profile")) print(plot_header( dico = dico, var = "profile.profile"), useSource = FALSE) ``` # Report generation ## Crunching Variables based on a plan ```{r examples-kobo_cruncher} dico <- kobo_dico( xlsformpath = system.file("sample_xlsform.xlsx", package = "kobocruncher") ) datalist <- kobo_data(datapath = system.file("data.xlsx", package = "kobocruncher") ) kobo_cruncher(datalist = datalist, dico = dico, datasource = "a great survey!") ``` ## Crunching Likert componnents ```{r examples-kobo_likert} dicolikert <- kobo_dico( xlsformpath = system.file("form_likert.xlsx", package = "kobocruncher") ) datalistlikert <- kobo_data(datapath = system.file("data_likert.xlsx", package = "kobocruncher") ) kobo_likert(datalist = datalistlikert, dico = dicolikert, datasource = "a great survey!") ``` ## Archive files in RIDL ```{r example-kobo_ridl} ### Example used for each template ## Time to archive your work once done!! # namethisfile = basename(rstudioapi::getSourceEditorContext()$path ) # if( params$publish == "yes"){ # kobo_ridl(ridl = params$ridl, # datafolder = params$datafolder, # form = params$form, # namethisfile = namethisfile , # visibility = params$visibility, # stage = params$stage) } ``` ## Report Template A for Automatic Data Exploration ```{r example-template_1_exploration} # template_1_exploration(datafolder= "data-raw", # ridl = "ridlproject", # data = "data.xlsx" , # form = "form.xlsx", # datasource = "Study name reference", # publish = "no", # republish = "no", # visibility = "public", # stage = "exploration_initial", # language = "", # folder = "Report") ``` ```{r template_1_exploration} ``` ## Report Template B for Joint Data Interpretation Session The second template is used following the systematic data exploration. It will generate a PowerPoint presentation See a more detailed presentation of that step here: https://www.youtube.com/watch?v=0jE-Y7g88K4&feature=youtu.be&t=2305 ```{r template_2_interpretation} #' Second Template to prepare a presentation for the Joint Data Interpretation Session #' # usethis::use_rmarkdown_template( # template_name = "template_2_interpretation", # template_dir = NULL, # template_description = "Joint Data Interpretation", # template_create_dir = TRUE # ) ``` ## Report Template C for Note taking The third template can be used in a similar way than the presentation template. It will generate a word document in order to take note. An automatic table of content is generated but might required to be refreshed after the word document creation ```{r template_3_note} #' Report Template 3 for Dissemination and Data Story Telling Template #' The last template can be used to take note of the data interpretation session. #' It will generate a PDF or an paginated HTML page # usethis::use_rmarkdown_template( # template_name = "template_C_notes", # template_dir = NULL, # template_description = "Note taking", # template_create_dir = TRUE # ) ``` ## Report Template D for Dissemination and Data Story Telling Template The last template can be used to build the final report. It includes some instructions and guidance on how to organize the content to increase your audience It will generate a PDF or an paginated HTML page ```{r template_4_dissemination} #' Report Template 3 for Dissemination and Data Story Telling Template #' The last template can be used to take note of the data interpretation session. #' It will generate a PDF or an paginated HTML page # usethis::use_rmarkdown_template( # template_name = "template_D_dissemination", # template_dir = NULL, # template_description = "Data brief and Story Telling", # template_create_dir = TRUE # ) ``` ## run_app ```{r example-run_app} # run_app() ```