Function that performs various checks to ensure the database is correctly formatted, and adjusts overlapping patient records.
checkBase(
base,
convertDates = FALSE,
dateFormat = NULL,
deleteMissing = NULL,
deleteErrors = NULL,
subjectID = "sID",
facilityID = "fID",
disDate = "Ddate",
admDate = "Adate",
maxIteration = 25,
retainAuxData = TRUE,
verbose = TRUE,
...
)
(data.table). A patient discharge database, in the form of a data.table. The data.table should have at least the following columns: sID: patientID (character) fID: facilityID (character) Adate: admission date (POSIXct, but character can be converted to POSIXct) Ddate: discharge date (POSIXct, but character can be converted to POSIXct)
(boolean) indicating if dates need to be converted to POSIXct if they are not
(character) giving the input format of the date character string (e.g. "ymd" for dates like "2019-10-30")
See parse_date_time
for more information on the format.
(character) How to handle records that contain a missing value in at least one of the four mandatory variables: NULL (default): do not delete. Stops the function with an error message. "record": deletes just the incorrect record. "patient": deletes all records of each patient with one or more incorrect records.
(character) How incorrect records should be deleted: "record" deletes just the incorrect record "patient" deletes all records of each patient with one or more incorrect records.
(character) the columns name containing the subject ID. Default is "sID"
(character) the columns name containing the facility ID. Default is "fID"
(character) the columns name containing the discharge date. Default is "Ddate"
(character) the columns name containing the admission date. Default is "Adate"
(integer) the maximum number of times the function will try and remove overlapping admissions
(boolean) allow retaining additional data provided in the database. Default is TRUE.
(boolean) print diagnostic messages. Default is TRUE.
other parameters passed on to internal functions
The adjusted database as a data.table with a new class attribute "hospinet.base" and an attribute "report" containing information related to the quality of the database.
## create a "fake and custom" data base
mydb = create_fake_subjectDB(n_subjects = 100, n_facilities = 100)
setnames(mydb, 1:4, c("myPatientId", "myHealthCareCenterID", "DateOfAdmission", "DateOfDischarge"))
mydb[,DateOfAdmission:= as.character(DateOfAdmission)]
#> myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> 1: s001 f041 2019-01-25 2019-01-30
#> 2: s001 f044 2019-03-03 2019-03-08
#> 3: s002 f015 2019-01-21 2019-02-01
#> 4: s002 f025 2019-03-18 2019-03-28
#> 5: s003 f006 2019-01-30 2019-01-31
#> ---
#> 242: s098 f047 2019-02-02 2019-02-03
#> 243: s098 f047 2019-03-09 2019-03-15
#> 244: s099 f082 2019-01-25 2019-01-26
#> 245: s099 f015 2019-02-24 2019-03-03
#> 246: s100 f043 2019-02-04 2019-02-07
mydb[,DateOfDischarge:= as.character(DateOfDischarge)]
#> myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> 1: s001 f041 2019-01-25 2019-01-30
#> 2: s001 f044 2019-03-03 2019-03-08
#> 3: s002 f015 2019-01-21 2019-02-01
#> 4: s002 f025 2019-03-18 2019-03-28
#> 5: s003 f006 2019-01-30 2019-01-31
#> ---
#> 242: s098 f047 2019-02-02 2019-02-03
#> 243: s098 f047 2019-03-09 2019-03-15
#> 244: s099 f082 2019-01-25 2019-01-26
#> 245: s099 f015 2019-02-24 2019-03-03
#> 246: s100 f043 2019-02-04 2019-02-07
head(mydb)
#> myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> 1: s001 f041 2019-01-25 2019-01-30
#> 2: s001 f044 2019-03-03 2019-03-08
#> 3: s002 f015 2019-01-21 2019-02-01
#> 4: s002 f025 2019-03-18 2019-03-28
#> 5: s003 f006 2019-01-30 2019-01-31
#> 6: s003 f049 2019-03-04 2019-03-05
# myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#1: s001 f078 2019-01-26 2019-02-01
#2: s002 f053 2019-01-18 2019-01-21
#3: s002 f049 2019-02-25 2019-03-05
#4: s002 f033 2019-04-17 2019-04-21
#5: s003 f045 2019-02-02 2019-02-04
#6: s003 f087 2019-03-12 2019-03-19
str(mydb)
#> Classes ‘data.table’ and 'data.frame': 246 obs. of 4 variables:
#> $ myPatientId : chr "s001" "s001" "s002" "s002" ...
#> $ myHealthCareCenterID: chr "f041" "f044" "f015" "f025" ...
#> $ DateOfAdmission : chr "2019-01-25" "2019-03-03" "2019-01-21" "2019-03-18" ...
#> $ DateOfDischarge : chr "2019-01-30" "2019-03-08" "2019-02-01" "2019-03-28" ...
#> - attr(*, ".internal.selfref")=<externalptr>
#Classes ‘data.table’ and 'data.frame': 262 obs. of 4 variables:
# $ myPatientId : chr "s001" "s002" "s002" "s002" ...
# $ myHealthCareCenterID: chr "f078" "f053" "f049" "f033" ...
# $ DateOfAdmission : chr "2019-01-26" "2019-01-18" "2019-02-25" "2019-04-17" ...
# $ DateOfDischarge : chr "2019-02-01" "2019-01-21" "2019-03-05" "2019-04-21" ...
#- attr(*, ".internal.selfref")=<externalptr>
my_checked_db = checkBase(mydb,
subjectID = "myPatientId",
facilityID = "myHealthCareCenterID",
disDate = "DateOfDischarge",
admDate = "DateOfAdmission",
convertDates = TRUE,
dateFormat = "ymd")
#> Converting Adate, Ddate to Date format
#> Checking for missing values...
#> Checking for duplicated records...
#> Removed 0 duplicates
#> Done.
#Converting Adate, Ddate to Date format
#Checking for missing values...
#Checking for duplicated records...
#Removed 0 duplicates
#Done.
head(my_checked_db)
#> sID fID Adate Ddate
#> 1: s001 f041 2019-01-25 2019-01-30
#> 2: s001 f044 2019-03-03 2019-03-08
#> 3: s002 f015 2019-01-21 2019-02-01
#> 4: s002 f025 2019-03-18 2019-03-28
#> 5: s003 f006 2019-01-30 2019-01-31
#> 6: s003 f049 2019-03-04 2019-03-05
# sID fID Adate Ddate
#1: s001 f078 2019-01-26 2019-02-01
#2: s002 f053 2019-01-18 2019-01-21
#3: s002 f049 2019-02-25 2019-03-05
#4: s002 f033 2019-04-17 2019-04-21
#5: s003 f045 2019-02-02 2019-02-04
#6: s003 f087 2019-03-12 2019-03-19
str(my_checked_db)
#> Classes ‘hospinet.base’, ‘data.table’ and 'data.frame': 246 obs. of 4 variables:
#> $ sID : chr "s001" "s001" "s002" "s002" ...
#> $ fID : chr "f041" "f044" "f015" "f025" ...
#> $ Adate: POSIXct, format: "2019-01-25" "2019-03-03" ...
#> $ Ddate: POSIXct, format: "2019-01-30" "2019-03-08" ...
#> - attr(*, ".internal.selfref")=<externalptr>
#> - attr(*, "sorted")= chr [1:3] "sID" "Adate" "Ddate"
#> - attr(*, "report")=List of 18
#> ..$ failedParse : num 0
#> ..$ removedMissing : num 0
#> ..$ missing : num 0
#> ..$ negativeLOS : num 0
#> ..$ removedNegativeLOS: num 0
#> ..$ removedDuplicates : int 0
#> ..$ neededIterations : num 0
#> ..$ allIterations : logi FALSE
#> ..$ addedAOS : int 0
#> ..$ originalSize : int 246
#> ..$ finalSize : int 246
#> ..$ LOSmean : 'difftime' num 5.2520325203252
#> .. ..- attr(*, "units")= chr "days"
#> ..$ TBAmean : 'difftime' num 30.5547945205479
#> .. ..- attr(*, "units")= chr "days"
#> ..$ admissions : int 246
#> ..$ subjects : int 100
#> ..$ numFacilities : int 96
#> ..$ LOSdistribution : 'table' int [1:14(1d)] 25 17 31 31 32 26 31 24 15 7 ...
#> .. ..- attr(*, "dimnames")=List of 1
#> .. .. ..$ LOS: chr [1:14] "1" "2" "3" "4" ...
#> ..$ TBAdistribution : 'table' int [1:44(1d)] 1 1 3 1 2 2 2 2 2 2 ...
#> .. ..- attr(*, "dimnames")=List of 1
#> .. .. ..$ TBA: chr [1:44] "6" "8" "10" "11" ...
#Classes ‘hospinet.base’, ‘data.table’ and 'data.frame': 262 obs. of 4 variables:
#$ sID : chr "s001" "s002" "s002" "s002" ...
#$ fID : chr "f078" "f053" "f049" "f033" ...
#$ Adate: POSIXct, format: "2019-01-26" "2019-01-18" "2019-02-25" "2019-04-17" ...
#$ Ddate: POSIXct, format: "2019-02-01" "2019-01-21" "2019-03-05" "2019-04-21" ...
# ...
## Show the quality report
attr(my_checked_db, "report")
#> $failedParse
#> [1] 0
#>
#> $removedMissing
#> [1] 0
#>
#> $missing
#> [1] 0
#>
#> $negativeLOS
#> [1] 0
#>
#> $removedNegativeLOS
#> [1] 0
#>
#> $removedDuplicates
#> [1] 0
#>
#> $neededIterations
#> [1] 0
#>
#> $allIterations
#> [1] FALSE
#>
#> $addedAOS
#> [1] 0
#>
#> $originalSize
#> [1] 246
#>
#> $finalSize
#> [1] 246
#>
#> $LOSmean
#> Time difference of 5.252033 days
#>
#> $TBAmean
#> Time difference of 30.55479 days
#>
#> $admissions
#> [1] 246
#>
#> $subjects
#> [1] 100
#>
#> $numFacilities
#> [1] 96
#>
#> $LOSdistribution
#> LOS
#> 1 2 3 4 5 6 7 8 9 10 11 12 13 14
#> 25 17 31 31 32 26 31 24 15 7 2 2 2 1
#>
#> $TBAdistribution
#> TBA
#> 6 8 10 11 12 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
#> 1 1 3 1 2 2 2 2 2 2 3 3 5 3 3 5 5 6 6 5 4 6 8 8 5 4
#> 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 51 52 60
#> 3 4 4 4 5 3 3 1 4 3 4 1 1 4 1 2 1 1
#>