# -----------------------------------------------------------------------------
# R programming statements for h208 data
#
# This file contains programming statements needed to import the ASCII data 
# file (.dat) into R. The R programming language has the capability to produce
# appropriate standard errors for estimates from a survey with a complex sample 
# design such as the Medical Expenditure Panel Survey (MEPS).
# 
# The input file is the ASCII data file (h208.dat) supplied in this PUF 
# release, which can be extracted from the .zip file supplied at the MEPS 
# website: https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
#
# This code imports the MEPS data into R as a data frame called 'h208'.
#  
# Note that additional packages are needed to successfully run this code. To 
# install these packages, run the 'install.packages' function (shown below). 
# Once installed, the packages can be called using the 'library' function. 
# Packages only need to be installed once, but they must be called using the 
# 'library' function every time a new R session is started.
#
# Two options are available to run this code:
#
#  1. Copy and paste the code into an interactive R session. 
#
#     The user must first download the ASCII (.dat) file from the MEPS website
#     and save it to a local directory, which must be defined in the 
#     'meps_path' variable below. In this example, the local directory is  
#     called "C:/MEPS". Note that the path structure will differ on Mac and PC.  
# 
#
#  2. Call this code directly from an interactive R session. 
#
#     (a) If the ASCII (.dat) file has already been downloaded from the MEPS  
#         website and saved to a local directory, the following code can be run 
#         (after re-defining the 'meps_path' variable to point to the location 
#         of the h208.dat file.)
#
#           meps_path <- "C:/MEPS/h208.dat"  
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h208/h208ru.txt")
#           head(h208) # view data
#
#
#     (b) Alternatively, the ASCII (.dat) file can be downloaded directly from
#         the MEPS website. The following code can be used to download and 
#         import the h208 data into R without having to manually download, 
#         unzip, and store the file on your local computer.
#
#           url <- "https://meps.ahrq.gov/mepsweb/data_files/pufs/h208dat.zip"
#           download.file(url, temp <- tempfile())
#
#           meps_path <- unzip(temp, exdir = tempdir())
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h208/h208ru.txt")
#
#           unlink(temp)  # Unlink to delete temporary file
#
#           head(h208) # view data
#
# -----------------------------------------------------------------------------


# DEFINE 'meps_path' -----------------------------------------------------------
# 'meps_path' should point to the file path of the ASCII file (h208.dat) 
# Here, the 'exists' function checks whether meps_path is already defined. This
# feature is useful if calling this file from an external source.

if(!exists("meps_path")) 
  meps_path = "C:/MEPS/h208.dat"  


# INSTALL PACKAGES ------------------------------------------------------------
# Uncomment and run this portion if packages are not yet installed
#
# install.packages("readr")


# LOAD PACKAGES ---------------------------------------------------------------
# Run this for every new R session

library(readr)


# DATA FILE INFO --------------------------------------------------------------

# Define start and end positions to read fixed-width file 

pos_start <- 
  c(1, 36, 46, 56, 67, 92, 106, 134, 142, 150, 161, 181, 192,
    194, 195, 209, 211, 214, 215, 216, 218, 221, 222, 223, 225,
    227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249,
    251, 253, 255, 257, 259, 261, 263, 265, 267, 269, 271, 273,
    275, 276, 277, 278, 280, 282, 284, 286, 288, 290, 292, 294,
    296, 298, 300, 303, 305, 306, 307, 315, 322, 330, 332, 335,
    337, 340, 342, 345)

pos_end <- 
  c(35, 45, 55, 66, 91, 105, 133, 141, 149, 160, 180, 191, 193,
    194, 208, 210, 213, 214, 215, 217, 220, 221, 222, 224, 226,
    228, 230, 232, 234, 236, 238, 240, 242, 244, 246, 248, 250,
    252, 254, 256, 258, 260, 262, 264, 266, 268, 270, 272, 274,
    275, 276, 277, 279, 281, 283, 285, 287, 289, 291, 293, 295,
    297, 299, 302, 304, 305, 306, 314, 321, 329, 331, 334, 336,
    339, 341, 344, 346)

# Define variable names and types ('c' = character, 'n' = 'numeric') 

var_names <- 
  c("EPCPIDX", "DUPERSID", "PHLDRIDX", "ESTBIDX", "EPRSIDX",
    "InsurPrivIDEX", "EPCPIDX_17", "DUPERSID_17",
    "PHLDRIDX_17", "ESTBIDX_17", "EPRSIDX_17", "JOBSIDX_17",
    "PANEL", "RN", "JOBSIDX", "JOBSINFR", "JOBSFILE", "PITFLG",
    "FYFLG", "CMJINS", "EMPLSTAT", "PHOLDER", "DEPNDNT",
    "PHLDRCHNG", "EVALCOVR", "STATUS1", "STATUS2", "STATUS3",
    "STATUS4", "STATUS5", "STATUS6", "STATUS7", "STATUS8",
    "STATUS9", "STATUS10", "STATUS11", "STATUS12", "STATUS13",
    "STATUS14", "STATUS15", "STATUS16", "STATUS17", "STATUS18",
    "STATUS19", "STATUS20", "STATUS21", "STATUS22", "STATUS23",
    "STATUS24", "DECPHLDR", "OUTPHLDR", "NOPUFLG",
    "COVROUT_M18", "TYPEFLAG", "STEXCH", "STSHOP",
    "PrivateCat", "PRIVCAT", "HOSPINSX", "MSUPINSX",
    "DENTLINS", "VISIONIN", "PMEDINS", "COBRA", "PLANMETL",
    "COVTYPIN", "OOPELIG", "OOPPREM", "OOPPREMX", "OOPX12X",
    "OOPFLAG", "PREMLEVX", "PREMSUBZ", "ANNDEDCT", "HSAACCT",
    "UPRHMO", "NAMECHNG")

var_types <- 
  c("c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c", "c",
    "n", "n", "c", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "c", "n", "n", "n")

var_types <- setNames(var_types, var_names)

# IMPORT ASCII (.dat) file ----------------------------------------------------

h208 <- read_fwf(
  meps_path,
  col_positions = 
    fwf_positions(
      start = pos_start, 
      end   = pos_end, 
      col_names = var_names),
  col_types = var_types)


# OPTIONAL: save as .Rdata file for easier loading ----------------------------
# Run this to save a permanent .Rdata file in the local working directory
#
# save(h208, file = "h208.Rdata")

# -----------------------------------------------------------------------------
# NOTES:
# 
#  1. This program has been tested on R version 3.6.0
#
#  2. This program will create a temporary data frame in R called 'h208'.
#     You must run the 'save' command to permanently save the data to a local
#     folder
# -----------------------------------------------------------------------------