# -----------------------------------------------------------------------------
# R programming statements for h36u22 data
#
# This file contains programming statements needed to import the ASCII data 
# file (.dat) into R. The R programming language has the capability to produce
# appropriate standard errors for estimates from a survey with a complex sample 
# design such as the Medical Expenditure Panel Survey (MEPS).
# 
# The input file is the ASCII data file (h36u22.dat) supplied in this PUF 
# release, which can be extracted from the .zip file supplied at the MEPS 
# website: https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
#
# This code imports the MEPS data into R as a data frame called 'h36u22'.
#  
# Note that additional packages are needed to successfully run this code. To 
# install these packages, run the 'install.packages' function (shown below). 
# Once installed, the packages can be called using the 'library' function. 
# Packages only need to be installed once, but they must be called using the 
# 'library' function every time a new R session is started.
#
# Two options are available to run this code:
#
#  1. Copy and paste the code into an interactive R session. 
#
#     The user must first download the ASCII (.dat) file from the MEPS website
#     and save it to a local directory, which must be defined in the 
#     'meps_path' variable below. In this example, the local directory is  
#     called "C:/MEPS". Note that the path structure will differ on Mac and PC.  
# 
#
#  2. Call this code directly from an interactive R session. 
#
#     (a) If the ASCII (.dat) file has already been downloaded from the MEPS  
#         website and saved to a local directory, the following code can be run 
#         (after re-defining the 'meps_path' variable to point to the location 
#         of the h36u22.dat file.)
#
#           meps_path <- "C:/MEPS/h36u22.dat"  
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h36u22/h36u22ru.txt")
#           head(h36u22) # view data
#
#
#     (b) Alternatively, the ASCII (.dat) file can be downloaded directly from
#         the MEPS website. The following code can be used to download and 
#         import the h36u22 data into R without having to manually download, 
#         unzip, and store the file on your local computer.
#
#           url <- "https://meps.ahrq.gov/mepsweb/data_files/pufs/h36u22dat.zip"
#           download.file(url, temp <- tempfile())
#
#           meps_path <- unzip(temp, exdir = tempdir())
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h36u22/h36u22ru.txt")
#
#           unlink(temp)  # Unlink to delete temporary file
#
#           head(h36u22) # view data
#
# -----------------------------------------------------------------------------


# DEFINE 'meps_path' -----------------------------------------------------------
# 'meps_path' should point to the file path of the ASCII file (h36u22.dat) 
# Here, the 'exists' function checks whether meps_path is already defined. This
# feature is useful if calling this file from an external source.

if(!exists("meps_path")) 
  meps_path = "C:/MEPS/h36u22.dat"  


# INSTALL PACKAGES ------------------------------------------------------------
# Uncomment and run this portion if packages are not yet installed
#
# install.packages("readr")


# LOAD PACKAGES ---------------------------------------------------------------
# Run this for every new R session

library(readr)


# DATA FILE INFO --------------------------------------------------------------

# Define start and end positions to read fixed-width file 

pos_start <- 
  c(1, 8, 11, 21, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
    34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
    49, 50, 54)

pos_end <- 
  c(7, 10, 20, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
    34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48,
    49, 53, 54)

# Define variable names and types ('c' = character, 'n' = 'numeric') 

var_names <- 
  c("DUID", "PID", "DUPERSID", "PANEL", "INH12", "INH20",
    "INH28", "INH38", "INH50", "INH60", "INH70", "INH79",
    "INH89", "INH97", "INH105", "INH113", "INH121", "INH129",
    "INH138", "INH147", "INH155", "INH163", "INH171", "INH181",
    "INH192", "INH201", "INH209", "INH216", "INH224", "INH233",
    "INH243", "STRA9622", "PSU9622")

var_types <- 
  c("n", "n", "c", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n")

var_types <- setNames(var_types, var_names)

# IMPORT ASCII (.dat) file ----------------------------------------------------

h36u22 <- read_fwf(
  meps_path,
  col_positions = 
    fwf_positions(
      start = pos_start, 
      end   = pos_end, 
      col_names = var_names),
  col_types = var_types)


# OPTIONAL: save as .Rdata file for easier loading ----------------------------
# Run this to save a permanent .Rdata file in the local working directory
#
# save(h36u22, file = "h36u22.Rdata")

# -----------------------------------------------------------------------------
# NOTES:
# 
#  1. This program has been tested on R version 3.6.0
#
#  2. This program will create a temporary data frame in R called 'h36u22'.
#     You must run the 'save' command to permanently save the data to a local
#     folder
# -----------------------------------------------------------------------------