# -----------------------------------------------------------------------------
# R programming statements for h203 data
#
# This file contains programming statements needed to import the ASCII data 
# file (.dat) into R. The R programming language has the capability to produce
# appropriate standard errors for estimates from a survey with a complex sample 
# design such as the Medical Expenditure Panel Survey (MEPS).
# 
# The input file is the ASCII data file (h203.dat) supplied in this PUF 
# release, which can be extracted from the .zip file supplied at the MEPS 
# website: https://meps.ahrq.gov/mepsweb/data_stats/download_data_files.jsp
#
# This code imports the MEPS data into R as a data frame called 'h203'.
#  
# Note that additional packages are needed to successfully run this code. To 
# install these packages, run the 'install.packages' function (shown below). 
# Once installed, the packages can be called using the 'library' function. 
# Packages only need to be installed once, but they must be called using the 
# 'library' function every time a new R session is started.
#
# Two options are available to run this code:
#
#  1. Copy and paste the code into an interactive R session. 
#
#     The user must first download the ASCII (.dat) file from the MEPS website
#     and save it to a local directory, which must be defined in the 
#     'meps_path' variable below. In this example, the local directory is  
#     called "C:/MEPS". Note that the path structure will differ on Mac and PC.  
# 
#
#  2. Call this code directly from an interactive R session. 
#
#     (a) If the ASCII (.dat) file has already been downloaded from the MEPS  
#         website and saved to a local directory, the following code can be run 
#         (after re-defining the 'meps_path' variable to point to the location 
#         of the h203.dat file.)
#
#           meps_path <- "C:/MEPS/h203.dat"  
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h203/h203ru.txt")
#           head(h203) # view data
#
#
#     (b) Alternatively, the ASCII (.dat) file can be downloaded directly from
#         the MEPS website. The following code can be used to download and 
#         import the h203 data into R without having to manually download, 
#         unzip, and store the file on your local computer.
#
#           url <- "https://meps.ahrq.gov/mepsweb/data_files/pufs/h203dat.zip"
#           download.file(url, temp <- tempfile())
#
#           meps_path <- unzip(temp, exdir = tempdir())
#           source("https://meps.ahrq.gov/mepsweb/data_stats/download_data/pufs/h203/h203ru.txt")
#
#           unlink(temp)  # Unlink to delete temporary file
#
#           head(h203) # view data
#
# -----------------------------------------------------------------------------


# DEFINE 'meps_path' -----------------------------------------------------------
# 'meps_path' should point to the file path of the ASCII file (h203.dat) 
# Here, the 'exists' function checks whether meps_path is already defined. This
# feature is useful if calling this file from an external source.

if(!exists("meps_path")) 
  meps_path = "C:/MEPS/h203.dat"  


# INSTALL PACKAGES ------------------------------------------------------------
# Uncomment and run this portion if packages are not yet installed
#
# install.packages("readr")


# LOAD PACKAGES ---------------------------------------------------------------
# Run this for every new R session

library(readr)


# DATA FILE INFO --------------------------------------------------------------

# Define start and end positions to read fixed-width file 

pos_start <- 
  c(1, 15, 26, 39, 42, 52, 59, 62, 63, 64, 66, 68, 72, 74, 78,
    79, 80, 82, 84, 86, 88, 90, 92, 94, 96, 98, 100, 102, 104,
    108, 113, 115, 117, 122, 124, 127, 129, 131, 133, 135, 137,
    139, 142, 144, 146, 148, 150, 152, 154, 156, 163, 165, 167,
    169, 171, 173, 175, 177, 179, 184, 189, 191, 193, 199, 201,
    210, 212, 218, 220, 222, 224, 233, 235, 237, 240, 242, 244,
    246, 254, 256, 265, 267, 276, 278, 280)

pos_end <- 
  c(14, 25, 38, 41, 51, 58, 61, 62, 63, 65, 67, 71, 73, 77, 78,
    79, 81, 83, 85, 87, 89, 91, 93, 95, 97, 99, 101, 103, 107,
    112, 114, 116, 121, 123, 126, 128, 130, 132, 134, 136, 138,
    141, 143, 145, 147, 149, 151, 153, 155, 162, 164, 166, 168,
    170, 172, 174, 176, 178, 183, 188, 190, 192, 198, 200, 209,
    211, 217, 219, 221, 223, 232, 234, 236, 239, 241, 243, 245,
    253, 255, 264, 266, 275, 277, 279, 281)

# Define variable names and types ('c' = character, 'n' = 'numeric') 

var_names <- 
  c("JOBSIDX", "JOBSIDX_17", "JOBIDX", "JOBNUM", "DUPERSID",
    "DUID", "PID", "RN", "OrigRnd", "PANEL", "JSTRTM",
    "JSTRTY", "JSTOPM", "JSTOPY", "RETIRJOB", "SUBTYPE",
    "STILLAT", "TYPECHGD", "MAIN_JOB", "DIFFWAGE",
    "StillWorkFTPT", "WhyChngPTToFT", "WhyChngFTToPT",
    "STILLWRK", "OFFTAKEI", "NOWTAKEI", "ESTBTHRU", "INSESTB",
    "HIDISAVW", "RvwTotNumEmp", "WHY_LEFT_M18", "JOBTYPE",
    "NUMEMPS", "ESTMATE1_M18", "ESTMATE2", "MORELOC", "BUSINC",
    "PROPRIET", "TYPEEMPL", "YLEFT_M18", "YNOBUSN_M18",
    "HRSPRWK", "HRS35WK", "TEMPJOB", "SESNLJOB", "SICKPAY",
    "PAYDRVST", "PAYVACTN", "RETIRPLN", "WKLYAMT", "EMPLINS",
    "JOBHASHI", "OFFRDINS", "DIFFPLNS", "ANYINS", "INUNION",
    "PROVDINS", "HHMEMBER_M18", "TOTLEMP_M18", "TotNumEmp",
    "SALARIED", "HOWPAID", "DAYWAGE", "HRSPRDY", "MAKEAMT",
    "PERUNIT_M18", "HRLYWAGE", "MORE10", "MORE15", "MOREMINM",
    "GROSSPAY", "GROSSPER", "SALRYWKS", "HRSALBAS", "EARNTIPS",
    "EARNBONS", "EARNCOMM", "TIPSAMT", "TIPSUNIT_M18",
    "BONSAMT", "BONSUNIT", "COMMAMT", "COMMUNIT", "INDCODEX",
    "OCCCODEX")

var_types <- 
  c("c", "c", "c", "c", "c", "c", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "c", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n", "n",
    "n")

var_types <- setNames(var_types, var_names)

# IMPORT ASCII (.dat) file ----------------------------------------------------

h203 <- read_fwf(
  meps_path,
  col_positions = 
    fwf_positions(
      start = pos_start, 
      end   = pos_end, 
      col_names = var_names),
  col_types = var_types)


# OPTIONAL: save as .Rdata file for easier loading ----------------------------
# Run this to save a permanent .Rdata file in the local working directory
#
# save(h203, file = "h203.Rdata")

# -----------------------------------------------------------------------------
# NOTES:
# 
#  1. This program has been tested on R version 3.6.0
#
#  2. This program will create a temporary data frame in R called 'h203'.
#     You must run the 'save' command to permanently save the data to a local
#     folder
# -----------------------------------------------------------------------------