# -----------------------------------------------------------------------------
# R programming statements for h36brr22 data
#
# This file contains programming statements needed to import the ASCII data 
# file (.dat) into R. The R programming language has the capability to produce
# appropriate standard errors for estimates from a survey with a complex sample 
# design such as the Medical Expenditure Panel Survey (MEPS).
# 
# The input file is the ASCII data file (h36brr22.dat) supplied in this PUF 
# release, which can be extracted from the .zip file supplied at the MEPS 
# website: https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
#
# This code imports the MEPS data into R as a data frame called 'h36brr22'.
#  
# Note that additional packages are needed to successfully run this code. To 
# install these packages, run the 'install.packages' function (shown below). 
# Once installed, the packages can be called using the 'library' function. 
# Packages only need to be installed once, but they must be called using the 
# 'library' function every time a new R session is started.
#
# Two options are available to run this code:
#
#  1. Copy and paste the code into an interactive R session. 
#
#     The user must first download the ASCII (.dat) file from the MEPS website
#     and save it to a local directory, which must be defined in the 
#     'meps_path' variable below. In this example, the local directory is  
#     called "C:/MEPS". Note that the path structure will differ on Mac and PC.  
# 
#
#  2. Call this code directly from an interactive R session. 
#
#     (a) If the ASCII (.dat) file has already been downloaded from the MEPS  
#         website and saved to a local directory, the following code can be run 
#         (after re-defining the 'meps_path' variable to point to the location 
#         of the h36brr22.dat file.)
#
#           meps_path <- "C:/MEPS/h36brr22.dat"  
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h36brr22/h36brr22ru.txt")
#           head(h36brr22) # view data
#
#
#     (b) Alternatively, the ASCII (.dat) file can be downloaded directly from
#         the MEPS website. The following code can be used to download and 
#         import the h36brr22 data into R without having to manually download, 
#         unzip, and store the file on your local computer.
#
#           url <- "https://meps.ahrq.gov/mepsweb/data_files/pufs/h36brr22dat.zip"
#           download.file(url, temp <- tempfile())
#
#           meps_path <- unzip(temp, exdir = tempdir())
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h36brr22/h36brr22ru.txt")
#
#           unlink(temp)  # Unlink to delete temporary file
#
#           head(h36brr22) # view data
#
# -----------------------------------------------------------------------------


# DEFINE 'meps_path' -----------------------------------------------------------
# 'meps_path' should point to the file path of the ASCII file (h36brr22.dat) 
# Here, the 'exists' function checks whether meps_path is already defined. This
# feature is useful if calling this file from an external source.

if(!exists("meps_path")) 
  meps_path = "C:/MEPS/h36brr22.dat"  


# INSTALL PACKAGES ------------------------------------------------------------
# Uncomment and run this portion if packages are not yet installed
#
# install.packages("readr")


# LOAD PACKAGES ---------------------------------------------------------------
# Run this for every new R session

library(readr)


# DATA FILE INFO --------------------------------------------------------------

# Define start and end positions to read fixed-width file 

pos_start <- 
  c(1, 8, 11, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
    34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
    79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93,
    94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
    107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
    119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
    131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
    143, 144, 145, 146, 147, 148, 149, 150)

pos_end <- 
  c(7, 10, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
    34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
    49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63,
    64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78,
    79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93,
    94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106,
    107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118,
    119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130,
    131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142,
    143, 144, 145, 146, 147, 148, 149, 150)

# Define variable names and types ('c' = character, 'n' = 'numeric') 

var_names <- 
  c("DUID", "PID", "DUPERSID", "PANEL", "BRR1", "BRR2", "BRR3",
    "BRR4", "BRR5", "BRR6", "BRR7", "BRR8", "BRR9", "BRR10",
    "BRR11", "BRR12", "BRR13", "BRR14", "BRR15", "BRR16",
    "BRR17", "BRR18", "BRR19", "BRR20", "BRR21", "BRR22",
    "BRR23", "BRR24", "BRR25", "BRR26", "BRR27", "BRR28",
    "BRR29", "BRR30", "BRR31", "BRR32", "BRR33", "BRR34",
    "BRR35", "BRR36", "BRR37", "BRR38", "BRR39", "BRR40",
    "BRR41", "BRR42", "BRR43", "BRR44", "BRR45", "BRR46",
    "BRR47", "BRR48", "BRR49", "BRR50", "BRR51", "BRR52",
    "BRR53", "BRR54", "BRR55", "BRR56", "BRR57", "BRR58",
    "BRR59", "BRR60", "BRR61", "BRR62", "BRR63", "BRR64",
    "BRR65", "BRR66", "BRR67", "BRR68", "BRR69", "BRR70",
    "BRR71", "BRR72", "BRR73", "BRR74", "BRR75", "BRR76",
    "BRR77", "BRR78", "BRR79", "BRR80", "BRR81", "BRR82",
    "BRR83", "BRR84", "BRR85", "BRR86", "BRR87", "BRR88",
    "BRR89", "BRR90", "BRR91", "BRR92", "BRR93", "BRR94",
    "BRR95", "BRR96", "BRR97", "BRR98", "BRR99", "BRR100",
    "BRR101", "BRR102", "BRR103", "BRR104", "BRR105", "BRR106",
    "BRR107", "BRR108", "BRR109", "BRR110", "BRR111", "BRR112",
    "BRR113", "BRR114", "BRR115", "BRR116", "BRR117", "BRR118",
    "BRR119", "BRR120", "BRR121", "BRR122", "BRR123", "BRR124",
    "BRR125", "BRR126", "BRR127", "BRR128")

var_types <- 
  c("n", "n", "c", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n")

var_types <- setNames(var_types, var_names)

# IMPORT ASCII (.dat) file ----------------------------------------------------

h36brr22 <- read_fwf(
  meps_path,
  col_positions = 
    fwf_positions(
      start = pos_start, 
      end   = pos_end, 
      col_names = var_names),
  col_types = var_types)


# OPTIONAL: save as .Rdata file for easier loading ----------------------------
# Run this to save a permanent .Rdata file in the local working directory
#
# save(h36brr22, file = "h36brr22.Rdata")

# -----------------------------------------------------------------------------
# NOTES:
# 
#  1. This program has been tested on R version 3.6.0
#
#  2. This program will create a temporary data frame in R called 'h36brr22'.
#     You must run the 'save' command to permanently save the data to a local
#     folder
# -----------------------------------------------------------------------------