|
| 1 | +import base64 |
| 2 | +import io |
| 3 | +import os |
| 4 | +import datetime |
| 5 | + |
| 6 | +import dash |
| 7 | +from dash import dcc, html, Input, Output, State |
| 8 | +from dash.dependencies import ALL |
| 9 | +import flask |
| 10 | +from flask import send_file, make_response |
| 11 | +import pandas as pd |
| 12 | + |
| 13 | +# Import the Parser from apple_health_parser |
| 14 | +# pip install apple-health-parser |
| 15 | +from apple_health_parser.utils.parser import Parser |
| 16 | + |
| 17 | +# ------------------------------------------------------------------- |
| 18 | +# Flask and Dash Setup |
| 19 | +# ------------------------------------------------------------------- |
| 20 | +server = flask.Flask(__name__) |
| 21 | +server.config["MAX_CONTENT_LENGTH"] = 512 * 1024 * 1024 # 512 MB |
| 22 | + |
| 23 | +app = dash.Dash(__name__, server=server) |
| 24 | +app.title = "Apple Health Parser (Group by Day)" |
| 25 | + |
| 26 | +# Global store: { flag: DataFrame } (raw data from parser) |
| 27 | +data_by_flag = {} |
| 28 | + |
| 29 | +# ------------------------------------------------------------------- |
| 30 | +# Layout |
| 31 | +# ------------------------------------------------------------------- |
| 32 | +app.layout = html.Div([ |
| 33 | + html.Div(style={"maxWidth": "900px", "margin": "auto", "backgroundColor": "#fff", "padding": "20px", "borderRadius": "8px"}, children=[ |
| 34 | + html.H1("Apple Health Parser – Group by Day"), |
| 35 | + html.P("Upload your apple_health_export.zip. For each flag, we group rows by the day (using 'start_date') and take the maximum 'value' for each day. " |
| 36 | + "Then, the first day is timestamp 0 and subsequent days are the day offset. The final CSV contains only [timestamp, metric]."), |
| 37 | + |
| 38 | + dcc.Upload( |
| 39 | + id="upload-zip", |
| 40 | + children=html.Div(["Drag & Drop or ", html.A("Select export.zip")]), |
| 41 | + style={ |
| 42 | + "width": "100%", "height": "80px", "lineHeight": "80px", |
| 43 | + "borderWidth": "2px", "borderStyle": "dashed", |
| 44 | + "borderRadius": "5px", "textAlign": "center", |
| 45 | + "margin": "10px 0", |
| 46 | + }, |
| 47 | + multiple=False |
| 48 | + ), |
| 49 | + |
| 50 | + html.Div(id="upload-status", style={"color": "red", "marginBottom": "15px"}), |
| 51 | + html.Div(id="flags-container", className="card-container") |
| 52 | + ]) |
| 53 | +]) |
| 54 | + |
| 55 | +# ------------------------------------------------------------------- |
| 56 | +# Callback: Parse ZIP and build cards for each flag with a dual slider |
| 57 | +# ------------------------------------------------------------------- |
| 58 | +@app.callback( |
| 59 | + [Output("upload-status", "children"), |
| 60 | + Output("flags-container", "children")], |
| 61 | + [Input("upload-zip", "contents")], |
| 62 | + [State("upload-zip", "filename")], |
| 63 | + prevent_initial_call=True |
| 64 | +) |
| 65 | +def parse_zip(contents, filename): |
| 66 | + if not contents: |
| 67 | + return ("No file uploaded.", []) |
| 68 | + if not filename.lower().endswith(".zip"): |
| 69 | + return ("Please upload a .zip Apple Health export.", []) |
| 70 | + |
| 71 | + try: |
| 72 | + _, b64data = contents.split(",") |
| 73 | + decoded = base64.b64decode(b64data) |
| 74 | + except Exception as e: |
| 75 | + return (f"Error decoding file: {e}", []) |
| 76 | + |
| 77 | + temp_zip = "temp_export.zip" |
| 78 | + with open(temp_zip, "wb") as f: |
| 79 | + f.write(decoded) |
| 80 | + |
| 81 | + global data_by_flag |
| 82 | + data_by_flag.clear() |
| 83 | + |
| 84 | + # Parse the export using apple_health_parser |
| 85 | + try: |
| 86 | + parser = Parser(export_file=temp_zip, overwrite=True) |
| 87 | + except Exception as e: |
| 88 | + return (f"Error initializing apple_health_parser: {e}", []) |
| 89 | + if os.path.exists(temp_zip): |
| 90 | + os.remove(temp_zip) |
| 91 | + |
| 92 | + flags = parser.flags |
| 93 | + if not flags: |
| 94 | + return ("No flags found in this export.", []) |
| 95 | + |
| 96 | + cards = [] |
| 97 | + for flg in flags: |
| 98 | + try: |
| 99 | + data_obj = parser.get_flag_records(flag=flg) |
| 100 | + df = data_obj.records |
| 101 | + if isinstance(df, pd.DataFrame): |
| 102 | + data_by_flag[flg] = df |
| 103 | + else: |
| 104 | + data_by_flag[flg] = pd.DataFrame(data_obj.records) |
| 105 | + except Exception as e: |
| 106 | + print(f"Warning: could not parse flag {flg}: {e}") |
| 107 | + continue |
| 108 | + |
| 109 | + df = data_by_flag.get(flg) |
| 110 | + # For grouping by day, we need to convert 'start_date' to datetime and get unique days. |
| 111 | + if df is not None and not df.empty and "start_date" in df.columns: |
| 112 | + df["start_date"] = pd.to_datetime(df["start_date"], errors="coerce") |
| 113 | + # Group by the date part and take max of 'value' |
| 114 | + grouped = df.groupby(df["start_date"].dt.date).agg({"value": "max"}).reset_index() |
| 115 | + row_count = len(grouped) |
| 116 | + preview_html = grouped.head(5).to_html(index=False) |
| 117 | + else: |
| 118 | + row_count = 0 |
| 119 | + preview_html = "<em>Empty DataFrame</em>" |
| 120 | + |
| 121 | + card = html.Div([ |
| 122 | + html.H3(flg), |
| 123 | + html.P(f"Unique Days: {row_count}"), |
| 124 | + html.Div(dcc.Markdown(preview_html, dangerously_allow_html=True), className="preview-table"), |
| 125 | + html.P("Select day range:"), |
| 126 | + dcc.RangeSlider( |
| 127 | + id={"type": "range-slider", "flag": flg}, |
| 128 | + min=0, |
| 129 | + max=row_count - 1 if row_count > 0 else 0, |
| 130 | + value=[0, row_count - 1] if row_count > 0 else [0, 0], |
| 131 | + step=1, |
| 132 | + marks={0: "0", row_count - 1: str(row_count - 1)} if row_count > 0 else {}, |
| 133 | + tooltip={"always_visible": False, "placement": "bottom"} |
| 134 | + ), |
| 135 | + html.Div( |
| 136 | + html.A("Download CSV", id={"type": "download-link", "flag": flg}, |
| 137 | + href=f"/download_csv?flag={flg}&start_index=0&end_index={row_count - 1}" if row_count > 0 else "#", |
| 138 | + target="_blank"), |
| 139 | + className="download-link" |
| 140 | + ) |
| 141 | + ], className="card") |
| 142 | + cards.append(card) |
| 143 | + |
| 144 | + msg = f"Parsed {len(data_by_flag)} flags from {filename}." |
| 145 | + return (msg, cards) |
| 146 | + |
| 147 | +# ------------------------------------------------------------------- |
| 148 | +# Callback: Update Download Link Hrefs Based on Slider Value |
| 149 | +# ------------------------------------------------------------------- |
| 150 | +@app.callback( |
| 151 | + Output({"type": "download-link", "flag": ALL}, "href"), |
| 152 | + Input({"type": "range-slider", "flag": ALL}, "value"), |
| 153 | + State({"type": "range-slider", "flag": ALL}, "id") |
| 154 | +) |
| 155 | +def update_download_links(slider_values, slider_ids): |
| 156 | + hrefs = [] |
| 157 | + for value, comp_id in zip(slider_values, slider_ids): |
| 158 | + flag = comp_id["flag"] |
| 159 | + start_index = value[0] |
| 160 | + end_index = value[1] |
| 161 | + href = f"/download_csv?flag={flag}&start_index={start_index}&end_index={end_index}" |
| 162 | + hrefs.append(href) |
| 163 | + return hrefs |
| 164 | + |
| 165 | +# ------------------------------------------------------------------- |
| 166 | +# CSV Download Route |
| 167 | +# ------------------------------------------------------------------- |
| 168 | +@server.route("/download_csv") |
| 169 | +def download_csv(): |
| 170 | + """ |
| 171 | + For a given flag and day range (based on slider indices): |
| 172 | + - Convert 'start_date' to datetime, group by day (using the date part) |
| 173 | + - Take the maximum 'value' for each day in the group |
| 174 | + - Slice the grouped data based on the slider indices |
| 175 | + - Normalize timestamps: first day of the slice becomes 0 (days offset) |
| 176 | + - Rename 'value' -> 'metric' |
| 177 | + - Output CSV with columns [timestamp, metric] |
| 178 | + - File name => {flag}_{YYYY-MM-DD}-{YYYY-MM-DD}.csv from the slice's min and max dates. |
| 179 | + """ |
| 180 | + flag = flask.request.args.get("flag") |
| 181 | + start_index = flask.request.args.get("start_index", type=int) |
| 182 | + end_index = flask.request.args.get("end_index", type=int) |
| 183 | + if not flag: |
| 184 | + return make_response("No flag specified.", 400) |
| 185 | + |
| 186 | + global data_by_flag |
| 187 | + if flag not in data_by_flag: |
| 188 | + return make_response("Flag not found in memory; re-upload needed.", 404) |
| 189 | + |
| 190 | + df = data_by_flag[flag] |
| 191 | + if df is None or df.empty: |
| 192 | + return make_response("No data for this flag (empty).", 404) |
| 193 | + |
| 194 | + if "start_date" not in df.columns or "value" not in df.columns: |
| 195 | + return make_response("Dataset missing 'start_date' or 'value' columns.", 400) |
| 196 | + |
| 197 | + df_tmp = df.copy() |
| 198 | + df_tmp["start_date"] = pd.to_datetime(df_tmp["start_date"], errors="coerce") |
| 199 | + df_tmp = df_tmp.dropna(subset=["start_date"]).sort_values("start_date") |
| 200 | + if df_tmp.empty: |
| 201 | + return make_response("No valid start_date after conversion.", 404) |
| 202 | + |
| 203 | + # Group by day (using the date part) and take max of 'value' |
| 204 | + grouped = df_tmp.groupby(df_tmp["start_date"].dt.date).agg({"value": "max"}).reset_index() |
| 205 | + if grouped.empty: |
| 206 | + return make_response("Grouping resulted in no data.", 404) |
| 207 | + |
| 208 | + # Slice the grouped DataFrame based on slider indices (inclusive) |
| 209 | + grouped_slice = grouped.iloc[start_index:end_index+1] |
| 210 | + if grouped_slice.empty: |
| 211 | + return make_response("No rows in the selected day range.", 404) |
| 212 | + |
| 213 | + # Normalize: first day becomes 0 (days offset) |
| 214 | + first_day = grouped_slice["start_date"].iloc[0] |
| 215 | + # Compute days offset |
| 216 | + grouped_slice["timestamp"] = grouped_slice["start_date"].apply(lambda d: (datetime.datetime.combine(d, datetime.time.min) - datetime.datetime.combine(first_day, datetime.time.min)).days) |
| 217 | + grouped_slice["metric"] = grouped_slice["value"] |
| 218 | + df_out = grouped_slice[["timestamp", "metric"]].dropna() |
| 219 | + if df_out.empty: |
| 220 | + return make_response("After transformation, no valid rows remain.", 404) |
| 221 | + |
| 222 | + # Filename from slice's min and max date |
| 223 | + min_date = grouped_slice["start_date"].min() |
| 224 | + max_date = grouped_slice["start_date"].max() |
| 225 | + start_str = min_date.strftime("%Y-%m-%d") |
| 226 | + end_str = max_date.strftime("%Y-%m-%d") |
| 227 | + csv_filename = f"{flag}_{start_str}-{end_str}.csv" |
| 228 | + |
| 229 | + buf = io.StringIO() |
| 230 | + df_out.to_csv(buf, index=False) |
| 231 | + buf.seek(0) |
| 232 | + |
| 233 | + mem = io.BytesIO(buf.getvalue().encode("utf-8")) |
| 234 | + return send_file(mem, mimetype="text/csv", as_attachment=True, download_name=csv_filename) |
| 235 | + |
| 236 | +# ------------------------------------------------------------------- |
| 237 | +# Run the App |
| 238 | +# ------------------------------------------------------------------- |
| 239 | +if __name__ == "__main__": |
| 240 | + app.run(debug=True, port=8050) |
0 commit comments