Skip to content

Commit b1dd1e3

Browse files
committed
minor edits
1 parent eee3942 commit b1dd1e3

3 files changed

Lines changed: 40 additions & 25 deletions

File tree

scratch/notebooks/collect_perf_commits.ipynb

Lines changed: 34 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
"cells": [
33
{
44
"cell_type": "code",
5-
"execution_count": 1,
5+
"execution_count": 7,
66
"id": "13626f75",
77
"metadata": {},
88
"outputs": [
@@ -12,13 +12,6 @@
1212
"text": [
1313
"/mnt/sdd1/atharvas/formulacode/datasmith\n"
1414
]
15-
},
16-
{
17-
"name": "stderr",
18-
"output_type": "stream",
19-
"text": [
20-
"09:16:49 WARNING simple_useragent.core: Falling back to historic user agent.\n"
21-
]
2215
}
2316
],
2417
"source": [
@@ -35,7 +28,7 @@
3528
},
3629
{
3730
"cell_type": "code",
38-
"execution_count": 2,
31+
"execution_count": 11,
3932
"id": "b4179f19",
4033
"metadata": {},
4134
"outputs": [],
@@ -49,12 +42,13 @@
4942
" --limit-per-repo 2\n",
5043
" --max-attempts 3\n",
5144
" --max-steps 10\n",
52-
"\"\"\".strip().replace(\"\\n\", \" \")"
45+
"\"\"\".strip().replace(\"\\n\", \" \")\n",
46+
"cr = ContextRegistry.load_from_file(Path(\"scratch/artifacts/pipeflush/context_registry.json\"))"
5347
]
5448
},
5549
{
5650
"cell_type": "code",
57-
"execution_count": 3,
51+
"execution_count": null,
5852
"id": "6624689c",
5953
"metadata": {},
6054
"outputs": [
@@ -230,15 +224,14 @@
230224
}
231225
],
232226
"source": [
233-
"cr = ContextRegistry.load_from_file(Path(\"scratch/artifacts/pipeflush/context_registry.json\"))\n",
234227
"commit_pth = Path(\"scratch/artifacts/pipeflush/commits_perfonly.parquet\")\n",
235228
"commit_df = pd.read_parquet(commit_pth)\n",
236229
"commit_df.head()"
237230
]
238231
},
239232
{
240233
"cell_type": "code",
241-
"execution_count": 4,
234+
"execution_count": 9,
242235
"id": "79905eb5",
243236
"metadata": {},
244237
"outputs": [
@@ -295,17 +288,17 @@
295288
},
296289
{
297290
"cell_type": "code",
298-
"execution_count": 5,
291+
"execution_count": 12,
299292
"id": "567cdaa5",
300293
"metadata": {},
301294
"outputs": [
302295
{
303296
"name": "stderr",
304297
"output_type": "stream",
305298
"text": [
306-
"09:17:16 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_0/context_registry.json\n",
307-
"09:17:36 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_1/context_registry.json\n",
308-
"09:17:58 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_2/context_registry.json\n"
299+
"09:51:55 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_0/context_registry.json\n",
300+
"09:51:59 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_1/context_registry.json\n",
301+
"09:52:08 INFO datasmith.docker.context: Context registry saved to scratch/artifacts/pipeflush/chunk_2/context_registry.json\n"
309302
]
310303
}
311304
],
@@ -338,7 +331,7 @@
338331
},
339332
{
340333
"cell_type": "code",
341-
"execution_count": 6,
334+
"execution_count": 13,
342335
"id": "3fafdd1c",
343336
"metadata": {},
344337
"outputs": [
@@ -390,11 +383,31 @@
390383
},
391384
{
392385
"cell_type": "code",
393-
"execution_count": null,
386+
"execution_count": 20,
394387
"id": "f5133158",
395388
"metadata": {},
396-
"outputs": [],
397-
"source": []
389+
"outputs": [
390+
{
391+
"data": {
392+
"text/plain": [
393+
"array([47.51953125, 47.51785714, 47.52559055])"
394+
]
395+
},
396+
"execution_count": 20,
397+
"metadata": {},
398+
"output_type": "execute_result"
399+
}
400+
],
401+
"source": [
402+
"import numpy as np\n",
403+
"\n",
404+
"lens = np.array([len(d) for d in [df1, df2, df3]])\n",
405+
"ratios = np.array(ratios)\n",
406+
"\n",
407+
"l2r = lens * 15 / ratios # min\n",
408+
"l2r_hrs = l2r / 60\n",
409+
"l2r_hrs"
410+
]
398411
}
399412
],
400413
"metadata": {

scratch/scripts/update_context_registry.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,13 +9,15 @@ def update_cr_entrypoint(cr: ContextRegistry):
99
for k, v in cr.registry.items():
1010
new_v = deepcopy(v)
1111
new_v.entrypoint_data = DockerContext().entrypoint_data
12+
new_v.dockerfile_data = DockerContext().dockerfile_data
13+
new_v.env_building_data = DockerContext().env_building_data
1214
new_reg[k] = new_v
1315

1416
cr.registry = new_reg
1517
return cr
1618

1719

1820
if __name__ == "__main__":
19-
cr = ContextRegistry.load_from_file(Path("scratch/context_registry.json"))
21+
cr = ContextRegistry.load_from_file(Path("scratch/artifacts/pipeflush/tiny/context_registry.json"))
2022
new_cr = update_cr_entrypoint(cr)
21-
new_cr.save_to_file(Path("scratch/context_registry_updated.json"))
23+
new_cr.save_to_file(Path("scratch/artifacts/pipeflush/context_registry.json"))

src/datasmith/docker/Dockerfile

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ ARG COMMIT_SHA
77

88
RUN apt-get update && \
99
apt-get install -y --no-install-recommends \
10-
curl git build-essential jq cmake ninja-build && \
10+
jq cmake ninja-build && \
1111
rm -rf /var/lib/apt/lists/*
1212

1313
RUN curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest \
@@ -22,7 +22,7 @@ ENV MAMBA_ROOT_PREFIX=/opt/conda \
2222

2323
RUN micromamba install -y -p $MAMBA_ROOT_PREFIX -c conda-forge \
2424
python=3.10 \
25-
git asv pyperf mamba conda libmambapy jq && \
25+
git asv pyperf mamba conda libmambapy && \
2626
micromamba clean --all --yes
2727

2828
RUN mkdir -p /workspace /output

0 commit comments

Comments
 (0)