More code-orientied exploration of data, view for researchers.
First, list available studiest and patients
import pandas as pd
from jupyterhealth_client import Code, JupyterHealthClient
CGM = Code.BLOOD_GLUCOSE
pd.options.mode.chained_assignment = None
jh_client = JupyterHealthClient()
org_dict = {org['id']: org for org in jh_client.list_organizations()}
# children is not populated
for org in org_dict.values():
parent_id = org['partOf']
if parent_id is not None:
parent = org_dict[parent_id]
parent.setdefault("child_ids", []).append(org['id'])
def print_org(org, indent=''):
print(f"{indent}[{org['id']}] {org['name']}")
for org_id in org.get('child_ids', []):
print_org(org_dict[org_id], indent=" " * len(indent) + " ⮑")
for org_id in org_dict[0]['child_ids']:
print_org(org_dict[org_id])
print("All my studies:")
for study in jh_client.list_studies():
print(f" - [{study['id']}] {study['name']} org:{study['organization']['name']}")
# show all the patients with study data I have access to:
print("Patients with data I have access to:")
for patient in jh_client.list_patients():
consents = jh_client.get_patient_consents(patient['id'])
print(f"[{patient['id']}] {patient['nameFamily']}, {patient['nameGiven']}: {patient['telecomEmail']}")
for study in consents['studies']:
print(f" - [{study['id']}] {study['name']}")
for study in consents['studiesPendingConsent']:
print(f" - (not consented) [{study['id']}] {study['name']}")
if not consents['studies'] and not consents['studiesPendingConsent']:
print(" (no studies)")
Select study and patient¶
At this point, we can edit the code below to select the study and patient we are interested in. This is the same thing the widgets do in the dashboard.
# pick patient id, study id from above
study_id = 30012
patient_id = 40037
df = jh_client.list_observations_df(patient_id=patient_id, study_id=study_id, limit=10_000, code=CGM)
df.head()
import cgmquantify
df.iloc[0]
Reduce data to relevant subset for cgm
assert (df.blood_glucose_unit == 'MGDL').all()
# reduce data
cgm = df.loc[
df.resource_type == CGM.value,
[
"blood_glucose_value",
"effective_time_frame_date_time_local",
],
]
# ensure sorted by date
cgm = cgm.sort_values("effective_time_frame_date_time_local")
Plot over time
cgm.plot(x="effective_time_frame_date_time_local", y=["blood_glucose_value"])
Compute goals, categories
from enum import Enum
class Goal(Enum):
"""Enum for met/unmet
These strings will be used for the legend.
"""
met = "goal"
unmet = "over"
class Category(Enum):
very_high = "Very High"
high = "High"
target_range = "Target Range"
low = "Low"
very_low = "Very Low"
def classify_glucose(row):
"""Classify blood glucose"""
if row.blood_glucose_value < 54:
return Category.very_low.value
elif row.blood_glucose_value < 70:
return Category.low.value
elif row.blood_glucose_value < 180:
return Category.target_range.value
elif row.blood_glucose_value < 250:
return Category.high.value
else:
return Category.very_high.value
cgm["category"] = cgm.apply(classify_glucose, axis=1)
# goal = "110/70"
# bp["goal"] = bp.apply(partial(bp_goal, goal="110/70"), axis=1)
# bp
cgm.category.value_counts()
Compute fractions by category, goal
cgm.category.value_counts(normalize=True)
Test out styling based on therapeutic goal
category_colors = {
Category.very_high.value: "#FF5500",
Category.high.value: "#FFA500",
Category.target_range.value: "#afa",
Category.low.value: "#f44",
Category.very_low.value: "#a00",
}
def goal_style(row):
"""highlight rows outside target range"""
color = category_colors.get(row.category)
return [f"background-color:{color}" if color else None] * len(row)
cgm[-50:].style.hide().hide(["category"], axis="columns").apply(goal_style, axis=1)
# average
cgm.blood_glucose_value.mean()
import altair as alt
category_table = pd.DataFrame(
{
"range": [
Category.very_low.value,
Category.low.value,
Category.target_range.value,
Category.high.value,
Category.very_high.value,
],
"lower": [0, 54, 70, 180, 250],
"upper": [54, 70, 180, 250, 300],
}
)
chart = (
alt.Chart(cgm)
.mark_line()
.transform_window(
med_glucose="median(blood_glucose_value)",
frame=[-12, 12],
)
.encode(
x=alt.X("effective_time_frame_date_time_local:T", title="date"),
y=alt.Y("med_glucose:Q", title="mg/dL"),
# color=alt.Color("category").scale(color_scale)
)
.interactive()
)
color_scale = alt.Scale(
domain=list(category_colors.keys()),
range=list(category_colors.values()),
)
# chart |
color_background = (
alt.Chart(category_table)
.mark_rect(opacity=0.1)
.encode(
x=alt.value(0),
x2=alt.value(300),
y="lower",
y2="upper",
color=alt.Color("range").scale(color_scale),
)
)
chart + color_background
cgm.columns
# compute time-of-day column
import datetime
dt = cgm.effective_time_frame_date_time_local.dt
sameday = cgm.effective_time_frame_date_time_local
tod = dt.hour * 3600 + dt.minute * 60 + dt.second
cgm["time"] = tod.apply(lambda seconds: datetime.datetime.now().replace(hour=0, minute=0, second=0, microsecond=0)
+ datetime.timedelta(seconds=seconds))
cgm.sort_values("time", inplace=True)
# compute quantiles across all days
rolling = cgm.rolling("1h", on="time").blood_glucose_value
cgm["5"] = rolling.quantile(.05)
cgm["25"] = rolling.quantile(.25)
cgm["50"] = rolling.quantile(.50)
cgm["75"] = rolling.quantile(.75)
cgm["95"] = rolling.quantile(.95)
color = alt.Color("category:O", title="mg/dL").scale(
domain=list(category_colors.keys()),
range=list(category_colors.values()))
# plot quantile ranges over the color background
alt.Chart(cgm).mark_area(opacity=0.2).encode(
x=alt.X("time", title="time of day"),
y="5",
y2="95",
) + alt.Chart(cgm).mark_area(opacity=0.5).encode(
x="time",
y="25",
y2="75",
) + alt.Chart(cgm).mark_line().encode(
x="time",
y=alt.Y("50", title="mg/dL"),
) + color_background
# compute 'active %'
# the definition of this metric is not clear,
# but since it samples every 5 minutes,
# divide the number of samples by the expected number of samples
start = cgm.effective_time_frame_date_time_local.min()
end = cgm.effective_time_frame_date_time_local.max()
duration = (end-start).total_seconds()
expected_count = duration / 300
actual_count = len(cgm)
f"{actual_count / expected_count:.1%}"
# compute
fractions_by_category = cgm.category.value_counts() / len(cgm)
fractions_by_category
Compute fractions in each range
very_high = fractions_by_category.get(Category.very_high.value, 0)
high = fractions_by_category.get(Category.high.value, 0)
target = fractions_by_category.get(Category.target_range.value, 0)
low = fractions_by_category.get(Category.low.value, 0)
very_low = fractions_by_category.get(Category.very_low.value, 0)
print(f"{very_high:.0%} Very High (goal: <5%)")
print(f"{high:.0%} High")
print(f"{high + very_high:.0%} High or Very High (goal: <25%)")
print(f"{target:.0%} Target (goal: >=70%)")
print(f"{low:.0%} Low")
print(f"{very_low:.0%} Very Low (goal <1%)")
print(f"{low + very_low:.0%} Low or Very Low (goal <4%)")
alt.Chart(cgm).mark_bar().encode(y=alt.Y("count()", stack="normalize", title="% of samples"), color=color)
Also run some computations with cgmquantify
cgm['Time'] = df.effective_time_frame_date_time_local
cgm['Glucose'] = df.blood_glucose_value
cgm['Day'] = cgm['Time'].dt.date
cgmquantify.plotglucosebounds(cgm)
for metric in ("GMI", "PIR", "POR", "TIR", "TOR"):
f = getattr(cgmquantify, metric)
print(metric, f(cgm))