Function that performs various checks to ensure the database is correctly formatted, and adjusts overlapping patient records.

checkBase(
  base,
  convertDates = FALSE,
  dateFormat = NULL,
  deleteMissing = NULL,
  deleteErrors = NULL,
  subjectID = "sID",
  facilityID = "fID",
  disDate = "Ddate",
  admDate = "Adate",
  maxIteration = 25,
  retainAuxData = TRUE,
  verbose = TRUE,
  ...
)

Arguments

base

(data.table). A patient discharge database, in the form of a data.table. The data.table should have at least the following columns: sID: patientID (character) fID: facilityID (character) Adate: admission date (POSIXct, but character can be converted to POSIXct) Ddate: discharge date (POSIXct, but character can be converted to POSIXct)

convertDates

(boolean) indicating if dates need to be converted to POSIXct if they are not

dateFormat

(character) giving the input format of the date character string (e.g. "ymd" for dates like "2019-10-30") See parse_date_time for more information on the format.

deleteMissing

(character) How to handle records that contain a missing value in at least one of the four mandatory variables: NULL (default): do not delete. Stops the function with an error message. "record": deletes just the incorrect record. "patient": deletes all records of each patient with one or more incorrect records.

deleteErrors

(character) How incorrect records should be deleted: "record" deletes just the incorrect record "patient" deletes all records of each patient with one or more incorrect records.

subjectID

(character) the columns name containing the subject ID. Default is "sID"

facilityID

(character) the columns name containing the facility ID. Default is "fID"

disDate

(character) the columns name containing the discharge date. Default is "Ddate"

admDate

(character) the columns name containing the admission date. Default is "Adate"

maxIteration

(integer) the maximum number of times the function will try and remove overlapping admissions

retainAuxData

(boolean) allow retaining additional data provided in the database. Default is TRUE.

verbose

(boolean) print diagnostic messages. Default is TRUE.

...

other parameters passed on to internal functions

Value

The adjusted database as a data.table with a new class attribute "hospinet.base" and an attribute "report" containing information related to the quality of the database.

See also

Examples

## create a "fake and custom" data base
mydb = create_fake_subjectDB(n_subjects = 100, n_facilities = 100)
setnames(mydb, 1:4, c("myPatientId", "myHealthCareCenterID", "DateOfAdmission", "DateOfDischarge"))
mydb[,DateOfAdmission:= as.character(DateOfAdmission)]
#>      myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#>   1:        s001                 f041      2019-01-25      2019-01-30
#>   2:        s001                 f044      2019-03-03      2019-03-08
#>   3:        s002                 f015      2019-01-21      2019-02-01
#>   4:        s002                 f025      2019-03-18      2019-03-28
#>   5:        s003                 f006      2019-01-30      2019-01-31
#>  ---                                                                 
#> 242:        s098                 f047      2019-02-02      2019-02-03
#> 243:        s098                 f047      2019-03-09      2019-03-15
#> 244:        s099                 f082      2019-01-25      2019-01-26
#> 245:        s099                 f015      2019-02-24      2019-03-03
#> 246:        s100                 f043      2019-02-04      2019-02-07
mydb[,DateOfDischarge:= as.character(DateOfDischarge)]
#>      myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#>   1:        s001                 f041      2019-01-25      2019-01-30
#>   2:        s001                 f044      2019-03-03      2019-03-08
#>   3:        s002                 f015      2019-01-21      2019-02-01
#>   4:        s002                 f025      2019-03-18      2019-03-28
#>   5:        s003                 f006      2019-01-30      2019-01-31
#>  ---                                                                 
#> 242:        s098                 f047      2019-02-02      2019-02-03
#> 243:        s098                 f047      2019-03-09      2019-03-15
#> 244:        s099                 f082      2019-01-25      2019-01-26
#> 245:        s099                 f015      2019-02-24      2019-03-03
#> 246:        s100                 f043      2019-02-04      2019-02-07

head(mydb)
#>    myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#> 1:        s001                 f041      2019-01-25      2019-01-30
#> 2:        s001                 f044      2019-03-03      2019-03-08
#> 3:        s002                 f015      2019-01-21      2019-02-01
#> 4:        s002                 f025      2019-03-18      2019-03-28
#> 5:        s003                 f006      2019-01-30      2019-01-31
#> 6:        s003                 f049      2019-03-04      2019-03-05
#   myPatientId myHealthCareCenterID DateOfAdmission DateOfDischarge
#1:        s001                 f078      2019-01-26      2019-02-01
#2:        s002                 f053      2019-01-18      2019-01-21
#3:        s002                 f049      2019-02-25      2019-03-05
#4:        s002                 f033      2019-04-17      2019-04-21
#5:        s003                 f045      2019-02-02      2019-02-04
#6:        s003                 f087      2019-03-12      2019-03-19

str(mydb)
#> Classes ‘data.table’ and 'data.frame':	246 obs. of  4 variables:
#>  $ myPatientId         : chr  "s001" "s001" "s002" "s002" ...
#>  $ myHealthCareCenterID: chr  "f041" "f044" "f015" "f025" ...
#>  $ DateOfAdmission     : chr  "2019-01-25" "2019-03-03" "2019-01-21" "2019-03-18" ...
#>  $ DateOfDischarge     : chr  "2019-01-30" "2019-03-08" "2019-02-01" "2019-03-28" ...
#>  - attr(*, ".internal.selfref")=<externalptr> 
#Classes ‘data.table’ and 'data.frame':  262 obs. of  4 variables:
# $ myPatientId         : chr  "s001" "s002" "s002" "s002" ...
# $ myHealthCareCenterID: chr  "f078" "f053" "f049" "f033" ...
# $ DateOfAdmission     : chr  "2019-01-26" "2019-01-18" "2019-02-25" "2019-04-17" ...
# $ DateOfDischarge     : chr  "2019-02-01" "2019-01-21" "2019-03-05" "2019-04-21" ...
#- attr(*, ".internal.selfref")=<externalptr> 

my_checked_db = checkBase(mydb, 
     subjectID = "myPatientId", 
     facilityID = "myHealthCareCenterID", 
     disDate = "DateOfDischarge",
     admDate = "DateOfAdmission", 
     convertDates = TRUE, 
     dateFormat = "ymd")
#> Converting Adate, Ddate to Date format
#> Checking for missing values...
#> Checking for duplicated records...
#> Removed 0 duplicates
#> Done.

#Converting Adate, Ddate to Date format
#Checking for missing values...
#Checking for duplicated records...
#Removed 0 duplicates
#Done.

head(my_checked_db)
#>     sID  fID      Adate      Ddate
#> 1: s001 f041 2019-01-25 2019-01-30
#> 2: s001 f044 2019-03-03 2019-03-08
#> 3: s002 f015 2019-01-21 2019-02-01
#> 4: s002 f025 2019-03-18 2019-03-28
#> 5: s003 f006 2019-01-30 2019-01-31
#> 6: s003 f049 2019-03-04 2019-03-05
#    sID  fID      Adate      Ddate
#1: s001 f078 2019-01-26 2019-02-01
#2: s002 f053 2019-01-18 2019-01-21
#3: s002 f049 2019-02-25 2019-03-05
#4: s002 f033 2019-04-17 2019-04-21
#5: s003 f045 2019-02-02 2019-02-04
#6: s003 f087 2019-03-12 2019-03-19
str(my_checked_db)
#> Classes ‘hospinet.base’, ‘data.table’ and 'data.frame':	246 obs. of  4 variables:
#>  $ sID  : chr  "s001" "s001" "s002" "s002" ...
#>  $ fID  : chr  "f041" "f044" "f015" "f025" ...
#>  $ Adate: POSIXct, format: "2019-01-25" "2019-03-03" ...
#>  $ Ddate: POSIXct, format: "2019-01-30" "2019-03-08" ...
#>  - attr(*, ".internal.selfref")=<externalptr> 
#>  - attr(*, "sorted")= chr [1:3] "sID" "Adate" "Ddate"
#>  - attr(*, "report")=List of 18
#>   ..$ failedParse       : num 0
#>   ..$ removedMissing    : num 0
#>   ..$ missing           : num 0
#>   ..$ negativeLOS       : num 0
#>   ..$ removedNegativeLOS: num 0
#>   ..$ removedDuplicates : int 0
#>   ..$ neededIterations  : num 0
#>   ..$ allIterations     : logi FALSE
#>   ..$ addedAOS          : int 0
#>   ..$ originalSize      : int 246
#>   ..$ finalSize         : int 246
#>   ..$ LOSmean           : 'difftime' num 5.2520325203252
#>   .. ..- attr(*, "units")= chr "days"
#>   ..$ TBAmean           : 'difftime' num 30.5547945205479
#>   .. ..- attr(*, "units")= chr "days"
#>   ..$ admissions        : int 246
#>   ..$ subjects          : int 100
#>   ..$ numFacilities     : int 96
#>   ..$ LOSdistribution   : 'table' int [1:14(1d)] 25 17 31 31 32 26 31 24 15 7 ...
#>   .. ..- attr(*, "dimnames")=List of 1
#>   .. .. ..$ LOS: chr [1:14] "1" "2" "3" "4" ...
#>   ..$ TBAdistribution   : 'table' int [1:44(1d)] 1 1 3 1 2 2 2 2 2 2 ...
#>   .. ..- attr(*, "dimnames")=List of 1
#>   .. .. ..$ TBA: chr [1:44] "6" "8" "10" "11" ...
#Classes ‘hospinet.base’, ‘data.table’ and 'data.frame':  262 obs. of  4 variables:
#$ sID  : chr  "s001" "s002" "s002" "s002" ...
#$ fID  : chr  "f078" "f053" "f049" "f033" ...
#$ Adate: POSIXct, format: "2019-01-26" "2019-01-18" "2019-02-25" "2019-04-17" ...
#$ Ddate: POSIXct, format: "2019-02-01" "2019-01-21" "2019-03-05" "2019-04-21" ...
# ...

## Show the quality report
attr(my_checked_db, "report")
#> $failedParse
#> [1] 0
#> 
#> $removedMissing
#> [1] 0
#> 
#> $missing
#> [1] 0
#> 
#> $negativeLOS
#> [1] 0
#> 
#> $removedNegativeLOS
#> [1] 0
#> 
#> $removedDuplicates
#> [1] 0
#> 
#> $neededIterations
#> [1] 0
#> 
#> $allIterations
#> [1] FALSE
#> 
#> $addedAOS
#> [1] 0
#> 
#> $originalSize
#> [1] 246
#> 
#> $finalSize
#> [1] 246
#> 
#> $LOSmean
#> Time difference of 5.252033 days
#> 
#> $TBAmean
#> Time difference of 30.55479 days
#> 
#> $admissions
#> [1] 246
#> 
#> $subjects
#> [1] 100
#> 
#> $numFacilities
#> [1] 96
#> 
#> $LOSdistribution
#> LOS
#>  1  2  3  4  5  6  7  8  9 10 11 12 13 14 
#> 25 17 31 31 32 26 31 24 15  7  2  2  2  1 
#> 
#> $TBAdistribution
#> TBA
#>  6  8 10 11 12 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 
#>  1  1  3  1  2  2  2  2  2  2  3  3  5  3  3  5  5  6  6  5  4  6  8  8  5  4 
#> 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 51 52 60 
#>  3  4  4  4  5  3  3  1  4  3  4  1  1  4  1  2  1  1 
#>