Skip to content

Commit 149fa6f

Browse files
committed
complete with health parsing
1 parent 2419c54 commit 149fa6f

4 files changed

Lines changed: 531 additions & 0 deletions

File tree

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
data/apple_health_export/

health-parser/app.py

Lines changed: 240 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,240 @@
1+
import base64
2+
import io
3+
import os
4+
import datetime
5+
6+
import dash
7+
from dash import dcc, html, Input, Output, State
8+
from dash.dependencies import ALL
9+
import flask
10+
from flask import send_file, make_response
11+
import pandas as pd
12+
13+
# Import the Parser from apple_health_parser
14+
# pip install apple-health-parser
15+
from apple_health_parser.utils.parser import Parser
16+
17+
# -------------------------------------------------------------------
18+
# Flask and Dash Setup
19+
# -------------------------------------------------------------------
20+
server = flask.Flask(__name__)
21+
server.config["MAX_CONTENT_LENGTH"] = 512 * 1024 * 1024 # 512 MB
22+
23+
app = dash.Dash(__name__, server=server)
24+
app.title = "Apple Health Parser (Group by Day)"
25+
26+
# Global store: { flag: DataFrame } (raw data from parser)
27+
data_by_flag = {}
28+
29+
# -------------------------------------------------------------------
30+
# Layout
31+
# -------------------------------------------------------------------
32+
app.layout = html.Div([
33+
html.Div(style={"maxWidth": "900px", "margin": "auto", "backgroundColor": "#fff", "padding": "20px", "borderRadius": "8px"}, children=[
34+
html.H1("Apple Health Parser – Group by Day"),
35+
html.P("Upload your apple_health_export.zip. For each flag, we group rows by the day (using 'start_date') and take the maximum 'value' for each day. "
36+
"Then, the first day is timestamp 0 and subsequent days are the day offset. The final CSV contains only [timestamp, metric]."),
37+
38+
dcc.Upload(
39+
id="upload-zip",
40+
children=html.Div(["Drag & Drop or ", html.A("Select export.zip")]),
41+
style={
42+
"width": "100%", "height": "80px", "lineHeight": "80px",
43+
"borderWidth": "2px", "borderStyle": "dashed",
44+
"borderRadius": "5px", "textAlign": "center",
45+
"margin": "10px 0",
46+
},
47+
multiple=False
48+
),
49+
50+
html.Div(id="upload-status", style={"color": "red", "marginBottom": "15px"}),
51+
html.Div(id="flags-container", className="card-container")
52+
])
53+
])
54+
55+
# -------------------------------------------------------------------
56+
# Callback: Parse ZIP and build cards for each flag with a dual slider
57+
# -------------------------------------------------------------------
58+
@app.callback(
59+
[Output("upload-status", "children"),
60+
Output("flags-container", "children")],
61+
[Input("upload-zip", "contents")],
62+
[State("upload-zip", "filename")],
63+
prevent_initial_call=True
64+
)
65+
def parse_zip(contents, filename):
66+
if not contents:
67+
return ("No file uploaded.", [])
68+
if not filename.lower().endswith(".zip"):
69+
return ("Please upload a .zip Apple Health export.", [])
70+
71+
try:
72+
_, b64data = contents.split(",")
73+
decoded = base64.b64decode(b64data)
74+
except Exception as e:
75+
return (f"Error decoding file: {e}", [])
76+
77+
temp_zip = "temp_export.zip"
78+
with open(temp_zip, "wb") as f:
79+
f.write(decoded)
80+
81+
global data_by_flag
82+
data_by_flag.clear()
83+
84+
# Parse the export using apple_health_parser
85+
try:
86+
parser = Parser(export_file=temp_zip, overwrite=True)
87+
except Exception as e:
88+
return (f"Error initializing apple_health_parser: {e}", [])
89+
if os.path.exists(temp_zip):
90+
os.remove(temp_zip)
91+
92+
flags = parser.flags
93+
if not flags:
94+
return ("No flags found in this export.", [])
95+
96+
cards = []
97+
for flg in flags:
98+
try:
99+
data_obj = parser.get_flag_records(flag=flg)
100+
df = data_obj.records
101+
if isinstance(df, pd.DataFrame):
102+
data_by_flag[flg] = df
103+
else:
104+
data_by_flag[flg] = pd.DataFrame(data_obj.records)
105+
except Exception as e:
106+
print(f"Warning: could not parse flag {flg}: {e}")
107+
continue
108+
109+
df = data_by_flag.get(flg)
110+
# For grouping by day, we need to convert 'start_date' to datetime and get unique days.
111+
if df is not None and not df.empty and "start_date" in df.columns:
112+
df["start_date"] = pd.to_datetime(df["start_date"], errors="coerce")
113+
# Group by the date part and take max of 'value'
114+
grouped = df.groupby(df["start_date"].dt.date).agg({"value": "max"}).reset_index()
115+
row_count = len(grouped)
116+
preview_html = grouped.head(5).to_html(index=False)
117+
else:
118+
row_count = 0
119+
preview_html = "<em>Empty DataFrame</em>"
120+
121+
card = html.Div([
122+
html.H3(flg),
123+
html.P(f"Unique Days: {row_count}"),
124+
html.Div(dcc.Markdown(preview_html, dangerously_allow_html=True), className="preview-table"),
125+
html.P("Select day range:"),
126+
dcc.RangeSlider(
127+
id={"type": "range-slider", "flag": flg},
128+
min=0,
129+
max=row_count - 1 if row_count > 0 else 0,
130+
value=[0, row_count - 1] if row_count > 0 else [0, 0],
131+
step=1,
132+
marks={0: "0", row_count - 1: str(row_count - 1)} if row_count > 0 else {},
133+
tooltip={"always_visible": False, "placement": "bottom"}
134+
),
135+
html.Div(
136+
html.A("Download CSV", id={"type": "download-link", "flag": flg},
137+
href=f"/download_csv?flag={flg}&start_index=0&end_index={row_count - 1}" if row_count > 0 else "#",
138+
target="_blank"),
139+
className="download-link"
140+
)
141+
], className="card")
142+
cards.append(card)
143+
144+
msg = f"Parsed {len(data_by_flag)} flags from {filename}."
145+
return (msg, cards)
146+
147+
# -------------------------------------------------------------------
148+
# Callback: Update Download Link Hrefs Based on Slider Value
149+
# -------------------------------------------------------------------
150+
@app.callback(
151+
Output({"type": "download-link", "flag": ALL}, "href"),
152+
Input({"type": "range-slider", "flag": ALL}, "value"),
153+
State({"type": "range-slider", "flag": ALL}, "id")
154+
)
155+
def update_download_links(slider_values, slider_ids):
156+
hrefs = []
157+
for value, comp_id in zip(slider_values, slider_ids):
158+
flag = comp_id["flag"]
159+
start_index = value[0]
160+
end_index = value[1]
161+
href = f"/download_csv?flag={flag}&start_index={start_index}&end_index={end_index}"
162+
hrefs.append(href)
163+
return hrefs
164+
165+
# -------------------------------------------------------------------
166+
# CSV Download Route
167+
# -------------------------------------------------------------------
168+
@server.route("/download_csv")
169+
def download_csv():
170+
"""
171+
For a given flag and day range (based on slider indices):
172+
- Convert 'start_date' to datetime, group by day (using the date part)
173+
- Take the maximum 'value' for each day in the group
174+
- Slice the grouped data based on the slider indices
175+
- Normalize timestamps: first day of the slice becomes 0 (days offset)
176+
- Rename 'value' -> 'metric'
177+
- Output CSV with columns [timestamp, metric]
178+
- File name => {flag}_{YYYY-MM-DD}-{YYYY-MM-DD}.csv from the slice's min and max dates.
179+
"""
180+
flag = flask.request.args.get("flag")
181+
start_index = flask.request.args.get("start_index", type=int)
182+
end_index = flask.request.args.get("end_index", type=int)
183+
if not flag:
184+
return make_response("No flag specified.", 400)
185+
186+
global data_by_flag
187+
if flag not in data_by_flag:
188+
return make_response("Flag not found in memory; re-upload needed.", 404)
189+
190+
df = data_by_flag[flag]
191+
if df is None or df.empty:
192+
return make_response("No data for this flag (empty).", 404)
193+
194+
if "start_date" not in df.columns or "value" not in df.columns:
195+
return make_response("Dataset missing 'start_date' or 'value' columns.", 400)
196+
197+
df_tmp = df.copy()
198+
df_tmp["start_date"] = pd.to_datetime(df_tmp["start_date"], errors="coerce")
199+
df_tmp = df_tmp.dropna(subset=["start_date"]).sort_values("start_date")
200+
if df_tmp.empty:
201+
return make_response("No valid start_date after conversion.", 404)
202+
203+
# Group by day (using the date part) and take max of 'value'
204+
grouped = df_tmp.groupby(df_tmp["start_date"].dt.date).agg({"value": "max"}).reset_index()
205+
if grouped.empty:
206+
return make_response("Grouping resulted in no data.", 404)
207+
208+
# Slice the grouped DataFrame based on slider indices (inclusive)
209+
grouped_slice = grouped.iloc[start_index:end_index+1]
210+
if grouped_slice.empty:
211+
return make_response("No rows in the selected day range.", 404)
212+
213+
# Normalize: first day becomes 0 (days offset)
214+
first_day = grouped_slice["start_date"].iloc[0]
215+
# Compute days offset
216+
grouped_slice["timestamp"] = grouped_slice["start_date"].apply(lambda d: (datetime.datetime.combine(d, datetime.time.min) - datetime.datetime.combine(first_day, datetime.time.min)).days)
217+
grouped_slice["metric"] = grouped_slice["value"]
218+
df_out = grouped_slice[["timestamp", "metric"]].dropna()
219+
if df_out.empty:
220+
return make_response("After transformation, no valid rows remain.", 404)
221+
222+
# Filename from slice's min and max date
223+
min_date = grouped_slice["start_date"].min()
224+
max_date = grouped_slice["start_date"].max()
225+
start_str = min_date.strftime("%Y-%m-%d")
226+
end_str = max_date.strftime("%Y-%m-%d")
227+
csv_filename = f"{flag}_{start_str}-{end_str}.csv"
228+
229+
buf = io.StringIO()
230+
df_out.to_csv(buf, index=False)
231+
buf.seek(0)
232+
233+
mem = io.BytesIO(buf.getvalue().encode("utf-8"))
234+
return send_file(mem, mimetype="text/csv", as_attachment=True, download_name=csv_filename)
235+
236+
# -------------------------------------------------------------------
237+
# Run the App
238+
# -------------------------------------------------------------------
239+
if __name__ == "__main__":
240+
app.run(debug=True, port=8050)

health-parser/assets/styles.css

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
/* assets/styles.css */
2+
3+
body {
4+
font-family: "Roboto", sans-serif;
5+
margin: 20px;
6+
background-color: #f4f4f4;
7+
}
8+
9+
h1, h2, h3 {
10+
font-weight: 600;
11+
}
12+
13+
.card-container {
14+
display: flex;
15+
flex-wrap: wrap;
16+
gap: 1rem;
17+
margin-top: 1rem;
18+
justify-content: center;
19+
}
20+
21+
.card {
22+
background-color: #fff;
23+
border: 1px solid #ddd;
24+
border-radius: 6px;
25+
box-shadow: 0 1px 3px rgba(0,0,0,0.12);
26+
padding: 1rem;
27+
flex: 1 1 300px;
28+
max-width: 480px;
29+
}
30+
31+
.card h3 {
32+
margin-top: 0;
33+
}
34+
35+
.preview-table {
36+
overflow-x: auto;
37+
margin-bottom: 0.5rem;
38+
}
39+
40+
.preview-table table {
41+
border: 1px solid #ddd;
42+
border-collapse: collapse;
43+
width: 100%;
44+
}
45+
46+
.preview-table th,
47+
.preview-table td {
48+
border: 1px solid #ddd;
49+
padding: 0.3rem 0.6rem;
50+
text-align: left;
51+
font-size: 0.85rem;
52+
}
53+
54+
.download-link a {
55+
background-color: #007bff;
56+
color: #fff;
57+
text-decoration: none;
58+
border-radius: 4px;
59+
padding: 0.4rem 0.7rem;
60+
font-size: 0.9rem;
61+
transition: background-color 0.2s;
62+
}
63+
64+
.download-link a:hover {
65+
background-color: #0056b3;
66+
}

0 commit comments

Comments
 (0)