Function that performs various checks to ensure the database is correctly formatted, and adjusts overlapping patient records.
Usage
checkBase(
base,
convertDates = FALSE,
dateFormat = NULL,
deleteMissing = NULL,
deleteErrors = NULL,
subjectID = "sID",
facilityID = "fID",
disDate = "Ddate",
admDate = "Adate",
maxIteration = 25,
retainAuxData = TRUE,
verbose = TRUE,
...
)
Arguments
- base
(data.table). A patient discharge database, in the form of a data.table. The data.table should have at least the following columns: sID: patientID (character) fID: facilityID (character) Adate: admission date (POSIXct, but character can be converted to POSIXct) Ddate: discharge date (POSIXct, but character can be converted to POSIXct)
- convertDates
(boolean) indicating if dates need to be converted to POSIXct if they are not
- dateFormat
(character) giving the input format of the date character string (e.g. "ymd" for dates like "2019-10-30") See
parse_date_time
for more information on the format.- deleteMissing
(character) How to handle records that contain a missing value in at least one of the four mandatory variables: NULL (default): do not delete. Stops the function with an error message. "record": deletes just the incorrect record. "patient": deletes all records of each patient with one or more incorrect records.
- deleteErrors
(character) How incorrect records should be deleted: "record" deletes just the incorrect record "patient" deletes all records of each patient with one or more incorrect records.
- subjectID
(character) the columns name containing the subject ID. Default is "sID"
- facilityID
(character) the columns name containing the facility ID. Default is "fID"
- disDate
(character) the columns name containing the discharge date. Default is "Ddate"
- admDate
(character) the columns name containing the admission date. Default is "Adate"
- maxIteration
(integer) the maximum number of times the function will try and remove overlapping admissions
- retainAuxData
(boolean) allow retaining additional data provided in the database. Default is TRUE.
- verbose
(boolean) print diagnostic messages. Default is TRUE.
- ...
other parameters passed on to internal functions
Value
The adjusted database as a data.table with a new class attribute "hospinet.base" and an attribute "report" containing information related to the quality of the database.
Examples
## create a "fake and custom" data base
mydb = create_fake_subjectDB(n_subjects = 100, n_facilities = 100)
data.table::setnames(mydb, 1:4,
c("myPatientId", "myHealthCareCenterID", "DateOfAdmission", "DateOfDischarge"))
mydb[,DateOfAdmission:= as.character(DateOfAdmission)]
#> myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> <char> <char> <char> <POSc>
#> 1: s001 f035 2019-01-24 2019-01-26
#> 2: s002 f040 2019-01-22 2019-01-25
#> 3: s002 f016 2019-02-15 2019-02-19
#> 4: s002 f078 2019-03-12 2019-03-17
#> 5: s002 f090 2019-04-23 2019-05-01
#> ---
#> 258: s098 f055 2019-03-13 2019-03-21
#> 259: s099 f045 2019-01-27 2019-01-29
#> 260: s099 f051 2019-02-14 2019-02-18
#> 261: s100 f062 2019-02-21 2019-02-28
#> 262: s100 f001 2019-03-30 2019-04-07
mydb[,DateOfDischarge:= as.character(DateOfDischarge)]
#> myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> <char> <char> <char> <char>
#> 1: s001 f035 2019-01-24 2019-01-26
#> 2: s002 f040 2019-01-22 2019-01-25
#> 3: s002 f016 2019-02-15 2019-02-19
#> 4: s002 f078 2019-03-12 2019-03-17
#> 5: s002 f090 2019-04-23 2019-05-01
#> ---
#> 258: s098 f055 2019-03-13 2019-03-21
#> 259: s099 f045 2019-01-27 2019-01-29
#> 260: s099 f051 2019-02-14 2019-02-18
#> 261: s100 f062 2019-02-21 2019-02-28
#> 262: s100 f001 2019-03-30 2019-04-07
head(mydb)
#> myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> <char> <char> <char> <char>
#> 1: s001 f035 2019-01-24 2019-01-26
#> 2: s002 f040 2019-01-22 2019-01-25
#> 3: s002 f016 2019-02-15 2019-02-19
#> 4: s002 f078 2019-03-12 2019-03-17
#> 5: s002 f090 2019-04-23 2019-05-01
#> 6: s003 f050 2019-01-11 2019-01-17
# myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#1: s001 f078 2019-01-26 2019-02-01
#2: s002 f053 2019-01-18 2019-01-21
#3: s002 f049 2019-02-25 2019-03-05
#4: s002 f033 2019-04-17 2019-04-21
#5: s003 f045 2019-02-02 2019-02-04
#6: s003 f087 2019-03-12 2019-03-19
str(mydb)
#> Classes ‘data.table’ and 'data.frame': 262 obs. of 4 variables:
#> $ myPatientId : chr "s001" "s002" "s002" "s002" ...
#> $ myHealthCareCenterID: chr "f035" "f040" "f016" "f078" ...
#> $ DateOfAdmission : chr "2019-01-24" "2019-01-22" "2019-02-15" "2019-03-12" ...
#> $ DateOfDischarge : chr "2019-01-26" "2019-01-25" "2019-02-19" "2019-03-17" ...
#> - attr(*, ".internal.selfref")=<externalptr>
#Classes ‘data.table’ and 'data.frame': 262 obs. of 4 variables:
# $ myPatientId : chr "s001" "s002" "s002" "s002" ...
# $ myHealthCareCenterID: chr "f078" "f053" "f049" "f033" ...
# $ DateOfAdmission : chr "2019-01-26" "2019-01-18" "2019-02-25" "2019-04-17" ...
# $ DateOfDischarge : chr "2019-02-01" "2019-01-21" "2019-03-05" "2019-04-21" ...
#- attr(*, ".internal.selfref")=<externalptr>
my_checked_db = checkBase(mydb,
subjectID = "myPatientId",
facilityID = "myHealthCareCenterID",
disDate = "DateOfDischarge",
admDate = "DateOfAdmission",
convertDates = TRUE,
dateFormat = "ymd")
#> Converting Adate, Ddate to Date format
#> Checking for missing values...
#> Checking for duplicated records...
#> Removed 0 duplicates
#> Done.
#Converting Adate, Ddate to Date format
#Checking for missing values...
#Checking for duplicated records...
#Removed 0 duplicates
#Done.
head(my_checked_db)
#> Key: <sID, Adate, Ddate>
#> sID fID Adate Ddate
#> <char> <char> <POSc> <POSc>
#> 1: s001 f035 2019-01-24 2019-01-26
#> 2: s002 f040 2019-01-22 2019-01-25
#> 3: s002 f016 2019-02-15 2019-02-19
#> 4: s002 f078 2019-03-12 2019-03-17
#> 5: s002 f090 2019-04-23 2019-05-01
#> 6: s003 f050 2019-01-11 2019-01-17
# sID fID Adate Ddate
#1: s001 f078 2019-01-26 2019-02-01
#2: s002 f053 2019-01-18 2019-01-21
#3: s002 f049 2019-02-25 2019-03-05
#4: s002 f033 2019-04-17 2019-04-21
#5: s003 f045 2019-02-02 2019-02-04
#6: s003 f087 2019-03-12 2019-03-19
str(my_checked_db)
#> Classes ‘hospinet.base’, ‘data.table’ and 'data.frame': 262 obs. of 4 variables:
#> $ sID : chr "s001" "s002" "s002" "s002" ...
#> $ fID : chr "f035" "f040" "f016" "f078" ...
#> $ Adate: POSIXct, format: "2019-01-24" "2019-01-22" ...
#> $ Ddate: POSIXct, format: "2019-01-26" "2019-01-25" ...
#> - attr(*, ".internal.selfref")=<externalptr>
#> - attr(*, "sorted")= chr [1:3] "sID" "Adate" "Ddate"
#> - attr(*, "report")=List of 18
#> ..$ failedParse : num 0
#> ..$ removedMissing : num 0
#> ..$ missing : num 0
#> ..$ negativeLOS : num 0
#> ..$ removedNegativeLOS: num 0
#> ..$ removedDuplicates : int 0
#> ..$ neededIterations : num 0
#> ..$ allIterations : logi FALSE
#> ..$ addedAOS : int 0
#> ..$ originalSize : int 262
#> ..$ finalSize : int 262
#> ..$ LOSmean : 'difftime' num 5.16793893129771
#> .. ..- attr(*, "units")= chr "days"
#> ..$ TBAmean : 'difftime' num 29.6666666666667
#> .. ..- attr(*, "units")= chr "days"
#> ..$ admissions : int 262
#> ..$ subjects : int 100
#> ..$ numFacilities : int 88
#> ..$ LOSdistribution : 'table' int [1:12(1d)] 29 25 32 29 31 22 33 29 17 9 ...
#> .. ..- attr(*, "dimnames")=List of 1
#> .. .. ..$ LOS: chr [1:12] "1" "2" "3" "4" ...
#> ..$ TBAdistribution : 'table' int [1:44(1d)] 1 1 1 1 1 1 2 4 4 4 ...
#> .. ..- attr(*, "dimnames")=List of 1
#> .. .. ..$ TBA: chr [1:44] "5" "7" "8" "9" ...
#Classes ‘hospinet.base’, ‘data.table’ and 'data.frame': 262 obs. of 4 variables:
#$ sID : chr "s001" "s002" "s002" "s002" ...
#$ fID : chr "f078" "f053" "f049" "f033" ...
#$ Adate: POSIXct, format: "2019-01-26" "2019-01-18" "2019-02-25" "2019-04-17" ...
#$ Ddate: POSIXct, format: "2019-02-01" "2019-01-21" "2019-03-05" "2019-04-21" ...
# ...
## Show the quality report
attr(my_checked_db, "report")
#> $failedParse
#> [1] 0
#>
#> $removedMissing
#> [1] 0
#>
#> $missing
#> [1] 0
#>
#> $negativeLOS
#> [1] 0
#>
#> $removedNegativeLOS
#> [1] 0
#>
#> $removedDuplicates
#> [1] 0
#>
#> $neededIterations
#> [1] 0
#>
#> $allIterations
#> [1] FALSE
#>
#> $addedAOS
#> [1] 0
#>
#> $originalSize
#> [1] 262
#>
#> $finalSize
#> [1] 262
#>
#> $LOSmean
#> Time difference of 5.167939 days
#>
#> $TBAmean
#> Time difference of 29.66667 days
#>
#> $admissions
#> [1] 262
#>
#> $subjects
#> [1] 100
#>
#> $numFacilities
#> [1] 88
#>
#> $LOSdistribution
#> LOS
#> 1 2 3 4 5 6 7 8 9 10 11 12
#> 29 25 32 29 31 22 33 29 17 9 2 4
#>
#> $TBAdistribution
#> TBA
#> 5 7 8 9 10 11 13 14 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
#> 1 1 1 1 1 1 2 4 4 4 4 4 4 8 1 5 4 6 3 5 8 4 8 9 3 10
#> 34 35 36 37 38 39 40 41 42 43 44 45 46 48 49 51 52 60
#> 6 3 4 4 6 5 6 3 4 2 3 2 1 3 1 1 1 1
#>