Chapter 9 30-Day Readmittance
R
library(tidyverse)
<- read_csv("data/synthea//encounters.csv") encounters
- Look at the description column in the
encounters
dataset. How many encounters were there for each unique value ofDESCRIPTION
%>%
encounters _____(_____) %>%
_____(count = n()) %>%
arrange(-count)
- Look at the
REASONDESCRIPTION
column. How many encounters were there for each unique value ofREASONDESCRIPTION.
%>%
encounters _____(_____) %>%
_____(count = n()) %>%
arrange(-count)
- Look at the
DESCRIPTION
andREASONDESCRIPTION
column
%>%
encounters _____(_____) %>%
_____(count = n()) %>%
arrange(-count)
%>%
encounters group_by(DESCRIPTION) %>%
summarise(count = n()) %>%
arrange(-count)
# A tibble: 50 × 2
DESCRIPTION count
<chr> <int>
1 General examination of patient (procedure) 14946
2 Encounter for problem (procedure) 5196
3 Encounter for check up (procedure) 4515
4 Well child visit (procedure) 4144
5 Encounter for symptom 3929
6 Prenatal visit 2676
7 Urgent care clinic (procedure) 2373
8 Encounter for problem 2291
9 Follow-up encounter 2282
10 Patient encounter procedure 1624
# … with 40 more rows
%>%
encounters count(DESCRIPTION) %>%
arrange(-n)
# A tibble: 50 × 2
DESCRIPTION n
<chr> <int>
1 General examination of patient (procedure) 14946
2 Encounter for problem (procedure) 5196
3 Encounter for check up (procedure) 4515
4 Well child visit (procedure) 4144
5 Encounter for symptom 3929
6 Prenatal visit 2676
7 Urgent care clinic (procedure) 2373
8 Encounter for problem 2291
9 Follow-up encounter 2282
10 Patient encounter procedure 1624
# … with 40 more rows
%>%
encounters count(REASONDESCRIPTION) %>%
arrange(-n)
# A tibble: 81 × 2
REASONDESCRIPTION n
<chr> <int>
1 <NA> 39569
2 Normal pregnancy 3779
3 Hyperlipidemia 2273
4 Viral sinusitis (disorder) 1378
5 Acute viral pharyngitis (disorder) 691
6 Acute bronchitis (disorder) 690
7 Chronic congestive heart failure (disorder) 620
8 Malignant neoplasm of breast (disorder) 585
9 Child attention deficit disorder 287
10 Sinusitis (disorder) 265
# … with 71 more rows
<- encounters %>%
encounter_description_reason count(DESCRIPTION, REASONDESCRIPTION) %>%
arrange(-n)
encounter_description_reason
# A tibble: 137 × 3
DESCRIPTION REASONDESCRIPTION n
<chr> <chr> <int>
1 General examination of patient (procedure) <NA> 14946
2 Encounter for problem (procedure) <NA> 5138
3 Encounter for check up (procedure) <NA> 4515
4 Well child visit (procedure) <NA> 4144
5 Prenatal visit Normal pregnancy 2585
6 Urgent care clinic (procedure) <NA> 2373
7 Follow-up encounter Hyperlipidemia 2273
8 Patient encounter procedure <NA> 1624
9 Outpatient procedure <NA> 1543
10 Encounter for symptom Viral sinusitis (disorder) 1378
# … with 127 more rows
9.1 Data Filtering
We want to count and find the patients who have come to the emergency department for a heart attack and came back to the ED for a heart attack within 30 days.
- How is a “heart attack” define in the data?
- What columns do we need to answer that question?
R
names(encounters)
[1] "Id" "START" "STOP"
[4] "PATIENT" "ORGANIZATION" "PROVIDER"
[7] "PAYER" "ENCOUNTERCLASS" "CODE"
[10] "DESCRIPTION" "BASE_ENCOUNTER_COST" "TOTAL_CLAIM_COST"
[13] "PAYER_COVERAGE" "REASONCODE" "REASONDESCRIPTION"
library(janitor)
Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
<- clean_names(encounters) encounters
names(encounters)
[1] "id" "start" "stop"
[4] "patient" "organization" "provider"
[7] "payer" "encounterclass" "code"
[10] "description" "base_encounter_cost" "total_claim_cost"
[13] "payer_coverage" "reasoncode" "reasondescription"
%>%
encounters select(id, start, stop, patient, encounterclass, description, reasondescription)
# A tibble: 53,346 × 7
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 d0c40d10-… 2010-01-23 17:45:28 2010-01-23 18:10:28 034e9e3b-2… ambulatory
2 e88bc3a9-… 2012-01-23 17:45:28 2012-01-23 18:00:28 034e9e3b-2… wellness
3 8f104aa7-… 2001-05-01 15:02:18 2001-05-01 15:17:18 1d604da9-9… ambulatory
4 b85c339a-… 2011-07-28 15:02:18 2011-07-28 15:17:18 1d604da9-9… wellness
5 dae2b7cb-… 2010-07-27 12:58:08 2010-07-27 13:28:08 10339b10-3… wellness
6 1e0d6b0e-… 2010-05-05 00:26:23 2010-05-05 01:11:23 8d4c4326-e… outpatient
7 6aa37300-… 2011-04-30 00:26:23 2011-04-30 01:03:23 8d4c4326-e… outpatient
8 9d35ec9f-… 2011-08-10 00:26:23 2011-08-10 00:41:23 8d4c4326-e… ambulatory
9 ae7555a9-… 2011-11-17 00:26:23 2011-11-17 00:41:23 8d4c4326-e… ambulatory
10 7253a9f9-… 2012-04-24 00:26:23 2012-04-24 00:41:23 8d4c4326-e… outpatient
# … with 53,336 more rows, and 2 more variables: description <chr>,
# reasondescription <chr>
<- encounters %>%
encounters_col_sub select(id:patient, encounterclass, description, reasondescription)
- Filter encounters by “heat attack”
R
%>%
encounters_col_sub filter(description == "Cardiac Arrest" | description == "Myocardial Infarction")
# A tibble: 70 × 7
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 2500b8bd-… 2001-07-04 08:42:44 2001-07-04 10:27:44 d49f748f-9… emergency
2 83cda5b1-… 2010-10-24 05:28:51 2010-10-24 07:13:51 e05dd037-e… emergency
3 7302a9ce-… 2017-06-04 11:11:47 2017-06-04 12:56:47 0f5646bc-a… emergency
4 7db01f61-… 1989-01-17 18:12:03 1989-01-17 19:57:03 44c8b4c8-4… emergency
5 e73fe5e1-… 1973-07-19 04:25:46 1973-07-19 06:10:46 89752052-a… emergency
6 4f1264b0-… 2017-07-13 04:42:48 2017-07-13 06:27:48 8d1ba4bb-7… emergency
7 00d8f4e4-… 2007-08-23 19:03:06 2007-08-23 20:48:06 87be3f66-1… emergency
8 1785e2e2-… 2010-03-28 05:41:47 2010-03-28 07:26:47 fcd3f564-6… emergency
9 f026c67b-… 1991-02-18 01:51:56 1991-02-18 03:36:56 47392cc2-4… emergency
10 20083627-… 2017-03-21 09:16:22 2017-03-21 11:01:22 0447625b-b… emergency
# … with 60 more rows, and 2 more variables: description <chr>,
# reasondescription <chr>
# more robust and easier to change + read
<- c("Cardiac Arrest", "Myocardial Infarction") %>%
mi_terms str_to_lower()
mi_terms
[1] "cardiac arrest" "myocardial infarction"
<- encounters_col_sub %>%
mi_encounters filter(str_to_lower(description) %in% mi_terms)
mi_encounters
# A tibble: 70 × 7
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 2500b8bd-… 2001-07-04 08:42:44 2001-07-04 10:27:44 d49f748f-9… emergency
2 83cda5b1-… 2010-10-24 05:28:51 2010-10-24 07:13:51 e05dd037-e… emergency
3 7302a9ce-… 2017-06-04 11:11:47 2017-06-04 12:56:47 0f5646bc-a… emergency
4 7db01f61-… 1989-01-17 18:12:03 1989-01-17 19:57:03 44c8b4c8-4… emergency
5 e73fe5e1-… 1973-07-19 04:25:46 1973-07-19 06:10:46 89752052-a… emergency
6 4f1264b0-… 2017-07-13 04:42:48 2017-07-13 06:27:48 8d1ba4bb-7… emergency
7 00d8f4e4-… 2007-08-23 19:03:06 2007-08-23 20:48:06 87be3f66-1… emergency
8 1785e2e2-… 2010-03-28 05:41:47 2010-03-28 07:26:47 fcd3f564-6… emergency
9 f026c67b-… 1991-02-18 01:51:56 1991-02-18 03:36:56 47392cc2-4… emergency
10 20083627-… 2017-03-21 09:16:22 2017-03-21 11:01:22 0447625b-b… emergency
# … with 60 more rows, and 2 more variables: description <chr>,
# reasondescription <chr>
- Count the number of patients to get number of “heart attack” encounters
R
%>%
mi_encounters count(patient) %>%
arrange(-n)
# A tibble: 66 × 2
patient n
<chr> <int>
1 668ad49e-46d7-45ec-a4b4-dab0961d180b 2
2 87f05059-de42-4630-a35b-edb53d880640 2
3 ab41f97a-1f4a-4718-a760-1e112514f282 2
4 fd1e9a6d-5b50-4a6f-bd61-c884ab89f5c6 2
5 0325261f-61eb-46f8-acc6-89d15053fecd 1
6 0447625b-b860-483c-9f30-17ed375b1493 1
7 097f7be5-fdc1-42a8-b54b-65eb48c12cf1 1
8 0e866809-bc7a-4014-850d-06228ed80226 1
9 0f5646bc-a156-4ec0-9252-5b592e3d3184 1
10 1324aa93-d950-4796-b3f7-e1ab68c6505f 1
# … with 56 more rows
- We only want patients who have multiple ED “heart attack” encounters
R
%>%
mi_encounters count(patient) %>%
arrange(-n) %>%
filter(n > 1)
# A tibble: 4 × 2
patient n
<chr> <int>
1 668ad49e-46d7-45ec-a4b4-dab0961d180b 2
2 87f05059-de42-4630-a35b-edb53d880640 2
3 ab41f97a-1f4a-4718-a760-1e112514f282 2
4 fd1e9a6d-5b50-4a6f-bd61-c884ab89f5c6 2
<- mi_encounters %>%
pt_mi_repeat_ids count(patient) %>%
arrange(-n) %>%
filter(n > 1) %>%
pull(patient)
pt_mi_repeat_ids
[1] "668ad49e-46d7-45ec-a4b4-dab0961d180b"
[2] "87f05059-de42-4630-a35b-edb53d880640"
[3] "ab41f97a-1f4a-4718-a760-1e112514f282"
[4] "fd1e9a6d-5b50-4a6f-bd61-c884ab89f5c6"
- Filter the encounters for patients of interest.
R
<- encounters_col_sub %>%
pt_encounter_mi filter(patient %in% pt_mi_repeat_ids) %>%
arrange(patient, start, stop)
pt_encounter_mi
# A tibble: 202 × 7
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 285e294c-… 1968-07-13 06:05:07 1968-07-13 06:33:07 668ad49e-4… ambulatory
2 c36d13e7-… 1989-06-09 06:05:07 1989-06-09 06:35:07 668ad49e-4… wellness
3 10d57a1d-… 1990-04-19 06:05:07 1990-04-20 06:05:07 668ad49e-4… inpatient
4 419c46c6-… 1990-06-02 06:05:07 1990-06-03 06:20:07 668ad49e-4… inpatient
5 0387caf2-… 1991-05-30 06:05:07 1991-05-31 06:05:07 668ad49e-4… inpatient
6 67787de4-… 1992-06-12 06:05:07 1992-06-12 06:35:07 668ad49e-4… wellness
7 48ba5ae5-… 1993-06-12 06:05:07 1993-06-12 06:28:07 668ad49e-4… ambulatory
8 d8f837a8-… 1993-06-15 06:05:07 1993-06-16 06:05:07 668ad49e-4… inpatient
9 ec39566d-… 1994-06-24 06:05:07 1994-06-24 07:05:07 668ad49e-4… wellness
10 e5cfefb3-… 1995-06-30 06:05:07 1995-06-30 07:05:07 668ad49e-4… wellness
# … with 192 more rows, and 2 more variables: description <chr>,
# reasondescription <chr>
- Only care about the “heart attack” events.
There’s 2 ways you might think of doing this.
7a. Filter on “emergency” and “urgentcare” encounterclass
R
%>% distinct(encounterclass) pt_encounter_mi
# A tibble: 6 × 1
encounterclass
<chr>
1 ambulatory
2 wellness
3 inpatient
4 outpatient
5 emergency
6 urgentcare
%>%
pt_encounter_mi filter(encounterclass == "emergency" | encounterclass == "urgentcare")
# A tibble: 19 × 7
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 f02c2037-… 2011-01-21 06:05:07 2011-01-21 07:50:07 668ad49e-4… emergency
2 c78320a8-… 2011-08-23 06:05:07 2011-08-23 07:05:07 668ad49e-4… emergency
3 850ebf9e-… 2011-11-18 06:05:07 2011-11-18 07:50:07 668ad49e-4… emergency
4 320b5634-… 2014-04-04 06:05:07 2014-04-04 06:20:07 668ad49e-4… urgentcare
5 dd9d580e-… 2014-10-10 06:05:07 2014-10-10 06:20:07 668ad49e-4… urgentcare
6 ca904b1c-… 2017-03-03 06:05:07 2017-03-03 06:20:07 668ad49e-4… urgentcare
7 ed591b20-… 2017-03-10 06:05:07 2017-03-10 06:20:07 668ad49e-4… urgentcare
8 4789cea7-… 2018-05-12 06:05:07 2018-05-12 07:05:07 668ad49e-4… emergency
9 518e3e52-… 2019-09-13 06:05:07 2019-09-13 07:05:07 668ad49e-4… emergency
10 231bb53a-… 1955-02-01 13:04:22 1955-02-01 14:49:22 87f05059-d… emergency
11 e850908f-… 1977-07-19 13:04:22 1977-07-19 13:19:22 87f05059-d… urgentcare
12 60590a6f-… 1983-07-05 13:04:22 1983-07-05 14:49:22 87f05059-d… emergency
13 10b38a65-… 2005-10-23 10:35:20 2005-10-23 12:20:20 ab41f97a-1… emergency
14 2296e323-… 2012-02-12 10:35:20 2012-02-12 10:50:20 ab41f97a-1… urgentcare
15 da7fc65c-… 2013-04-07 10:35:20 2013-04-07 12:20:20 ab41f97a-1… emergency
16 6268d9e4-… 1957-05-25 20:06:53 1957-05-25 21:06:53 fd1e9a6d-5… emergency
17 0564a9a0-… 1961-05-27 20:06:53 1961-05-27 21:51:53 fd1e9a6d-5… emergency
18 820a33ac-… 1965-12-11 20:06:53 1965-12-11 20:21:53 fd1e9a6d-5… urgentcare
19 a01ee256-… 1967-01-07 20:06:53 1967-01-07 21:51:53 fd1e9a6d-5… emergency
# … with 2 more variables: description <chr>, reasondescription <chr>
7b. Filter description
on our “heart attack” terms.
R
mi_terms
[1] "cardiac arrest" "myocardial infarction"
<- pt_encounter_mi %>%
pt_enconter_mi_only filter(str_to_lower(description) %in% mi_terms)
pt_enconter_mi_only
# A tibble: 8 × 7
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 f02c2037-b… 2011-01-21 06:05:07 2011-01-21 07:50:07 668ad49e-4… emergency
2 850ebf9e-0… 2011-11-18 06:05:07 2011-11-18 07:50:07 668ad49e-4… emergency
3 231bb53a-7… 1955-02-01 13:04:22 1955-02-01 14:49:22 87f05059-d… emergency
4 60590a6f-2… 1983-07-05 13:04:22 1983-07-05 14:49:22 87f05059-d… emergency
5 10b38a65-e… 2005-10-23 10:35:20 2005-10-23 12:20:20 ab41f97a-1… emergency
6 da7fc65c-b… 2013-04-07 10:35:20 2013-04-07 12:20:20 ab41f97a-1… emergency
7 0564a9a0-b… 1961-05-27 20:06:53 1961-05-27 21:51:53 fd1e9a6d-5… emergency
8 a01ee256-3… 1967-01-07 20:06:53 1967-01-07 21:51:53 fd1e9a6d-5… emergency
# … with 2 more variables: description <chr>, reasondescription <chr>
9.2 Working with dates
R
library(lubridate)
“today” is the date the Synthea data was generated.
ymd("2020-04-28")
[1] "2020-04-28"
9.2.1 Converting to datetime objects
R
%>%
pt_enconter_mi_only mutate(stop_dt = ymd_hms(stop),
start_dt = ymd_hms(start))
# A tibble: 8 × 9
id start stop patient encounterclass
<chr> <dttm> <dttm> <chr> <chr>
1 f02c2037-b… 2011-01-21 06:05:07 2011-01-21 07:50:07 668ad49e-4… emergency
2 850ebf9e-0… 2011-11-18 06:05:07 2011-11-18 07:50:07 668ad49e-4… emergency
3 231bb53a-7… 1955-02-01 13:04:22 1955-02-01 14:49:22 87f05059-d… emergency
4 60590a6f-2… 1983-07-05 13:04:22 1983-07-05 14:49:22 87f05059-d… emergency
5 10b38a65-e… 2005-10-23 10:35:20 2005-10-23 12:20:20 ab41f97a-1… emergency
6 da7fc65c-b… 2013-04-07 10:35:20 2013-04-07 12:20:20 ab41f97a-1… emergency
7 0564a9a0-b… 1961-05-27 20:06:53 1961-05-27 21:51:53 fd1e9a6d-5… emergency
8 a01ee256-3… 1967-01-07 20:06:53 1967-01-07 21:51:53 fd1e9a6d-5… emergency
# … with 4 more variables: description <chr>, reasondescription <chr>,
# stop_dt <dttm>, start_dt <dttm>
9.2.2 Datetime calculations
- Calculate time of ED visits.
R
%>%
pt_enconter_mi_only mutate(stop_dt = ymd_hms(stop),
start_dt = ymd_hms(start)) %>%
select(-start, -stop) %>%
mutate(entounter_length = stop_dt - start_dt)
# A tibble: 8 × 8
id patient encounterclass description reasondescripti… stop_dt
<chr> <chr> <chr> <chr> <chr> <dttm>
1 f02c2… 668ad4… emergency Myocardial… <NA> 2011-01-21 07:50:07
2 850eb… 668ad4… emergency Cardiac Ar… <NA> 2011-11-18 07:50:07
3 231bb… 87f050… emergency Myocardial… <NA> 1955-02-01 14:49:22
4 60590… 87f050… emergency Myocardial… <NA> 1983-07-05 14:49:22
5 10b38… ab41f9… emergency Cardiac Ar… <NA> 2005-10-23 12:20:20
6 da7fc… ab41f9… emergency Cardiac Ar… <NA> 2013-04-07 12:20:20
7 0564a… fd1e9a… emergency Myocardial… <NA> 1961-05-27 21:51:53
8 a01ee… fd1e9a… emergency Cardiac Ar… <NA> 1967-01-07 21:51:53
# … with 2 more variables: start_dt <dttm>, entounter_length <drtn>
9.2.3 Lead and lag time
- Calculate time between ED visits.
Keep in mind if you are going to a lead
or a lag
.
R
%>%
pt_enconter_mi_only mutate(stop_dt = ymd_hms(stop),
start_dt = ymd_hms(start)) %>%
select(-start, -stop) %>%
mutate(entounter_length = stop_dt - start_dt,
next_encounter_start = lead(start_dt),
next_encounter_time = next_encounter_start - stop_dt)
# A tibble: 8 × 10
id patient encounterclass description reasondescripti… stop_dt
<chr> <chr> <chr> <chr> <chr> <dttm>
1 f02c2… 668ad4… emergency Myocardial… <NA> 2011-01-21 07:50:07
2 850eb… 668ad4… emergency Cardiac Ar… <NA> 2011-11-18 07:50:07
3 231bb… 87f050… emergency Myocardial… <NA> 1955-02-01 14:49:22
4 60590… 87f050… emergency Myocardial… <NA> 1983-07-05 14:49:22
5 10b38… ab41f9… emergency Cardiac Ar… <NA> 2005-10-23 12:20:20
6 da7fc… ab41f9… emergency Cardiac Ar… <NA> 2013-04-07 12:20:20
7 0564a… fd1e9a… emergency Myocardial… <NA> 1961-05-27 21:51:53
8 a01ee… fd1e9a… emergency Cardiac Ar… <NA> 1967-01-07 21:51:53
# … with 4 more variables: start_dt <dttm>, entounter_length <drtn>,
# next_encounter_start <dttm>, next_encounter_time <drtn>
9.3 Grouped column mutations
We want the calculations to happen for each patient separately.
R
<- pt_enconter_mi_only %>%
pt_enconter_mi_only mutate(stop_dt = ymd_hms(stop),
start_dt = ymd_hms(start)) %>%
select(-start, -stop) %>%
mutate(entounter_length = stop_dt - start_dt)
<- pt_enconter_mi_only %>%
mi_reencounter group_by(patient) %>%
mutate(next_encounter_start = lead(start_dt),
next_encounter_time = next_encounter_start - stop_dt) %>%
ungroup()
mi_reencounter
# A tibble: 8 × 10
id patient encounterclass description reasondescripti… stop_dt
<chr> <chr> <chr> <chr> <chr> <dttm>
1 f02c2… 668ad4… emergency Myocardial… <NA> 2011-01-21 07:50:07
2 850eb… 668ad4… emergency Cardiac Ar… <NA> 2011-11-18 07:50:07
3 231bb… 87f050… emergency Myocardial… <NA> 1955-02-01 14:49:22
4 60590… 87f050… emergency Myocardial… <NA> 1983-07-05 14:49:22
5 10b38… ab41f9… emergency Cardiac Ar… <NA> 2005-10-23 12:20:20
6 da7fc… ab41f9… emergency Cardiac Ar… <NA> 2013-04-07 12:20:20
7 0564a… fd1e9a… emergency Myocardial… <NA> 1961-05-27 21:51:53
8 a01ee… fd1e9a… emergency Cardiac Ar… <NA> 1967-01-07 21:51:53
# … with 4 more variables: start_dt <dttm>, entounter_length <drtn>,
# next_encounter_start <dttm>, next_encounter_time <drtn>
9.4 Find 30-day readmittance
R
%>%
mi_reencounter filter(next_encounter_time < 30)
# A tibble: 0 × 10
# … with 10 variables: id <chr>, patient <chr>, encounterclass <chr>,
# description <chr>, reasondescription <chr>, stop_dt <dttm>,
# start_dt <dttm>, entounter_length <drtn>, next_encounter_start <dttm>,
# next_encounter_time <drtn>
5 years:
R
%>%
mi_reencounter filter(next_encounter_time < 365.25 * 5)
# A tibble: 1 × 10
id patient encounterclass description reasondescripti… stop_dt
<chr> <chr> <chr> <chr> <chr> <dttm>
1 f02c2… 668ad4… emergency Myocardial… <NA> 2011-01-21 07:50:07
# … with 4 more variables: start_dt <dttm>, entounter_length <drtn>,
# next_encounter_start <dttm>, next_encounter_time <drtn>
10 years
R
%>%
mi_reencounter filter(next_encounter_time < 365.25 * 10)
# A tibble: 3 × 10
id patient encounterclass description reasondescripti… stop_dt
<chr> <chr> <chr> <chr> <chr> <dttm>
1 f02c2… 668ad4… emergency Myocardial… <NA> 2011-01-21 07:50:07
2 10b38… ab41f9… emergency Cardiac Ar… <NA> 2005-10-23 12:20:20
3 0564a… fd1e9a… emergency Myocardial… <NA> 1961-05-27 21:51:53
# … with 4 more variables: start_dt <dttm>, entounter_length <drtn>,
# next_encounter_start <dttm>, next_encounter_time <drtn>