From 25f2043530046ccddeb5a057eb64a2ca8665767d Mon Sep 17 00:00:00 2001 From: Sam Ritchie Date: Wed, 15 Jul 2020 12:32:39 -0600 Subject: [PATCH 1/3] custom base image --- caliban/config/__init__.py | 92 +++++++++++++++++++++++------ caliban/docker/build.py | 14 +---- tutorials/basic/.calibanconfig.json | 2 +- 3 files changed, 77 insertions(+), 31 deletions(-) diff --git a/caliban/config/__init__.py b/caliban/config/__init__.py index f85340c..08a10e9 100644 --- a/caliban/config/__init__.py +++ b/caliban/config/__init__.py @@ -20,7 +20,7 @@ import os import sys from enum import Enum -from typing import Any, Dict, List, Optional +from typing import Any, Dict, List, Optional, Tuple import schema as s @@ -58,8 +58,48 @@ def parse(label): "type": "ACCELERATOR_TYPE_UNSPECIFIED" } +DLVMS = { + "pytorch": [None, "1.0", "1.1", "1.2", "1.3", "1.4"], + "tf": [None, "1.0", "1.13", "1.14", "1.15"], + "tf2": [None, "2.0", "2.1", "2.2"], +} + # Schema for Caliban Config + +def _dlvm_config(job_mode: JobMode) -> Dict[str, str]: + """Generates a dict of custom DLVM image identifier -> the actual image ID + available from GCR. + + """ + mode = job_mode.lower() + + def with_version(s: str, version: Optional[str]) -> Tuple[str, str]: + return f"{s}-{version}" if version else s + + def image(lib: str, version: Optional[str]) -> str: + base = f"gcr.io/deeplearning-platform-release/{lib}-{mode}" + k = with_version(f"dlvm:{lib}-{mode}", version) + v = with_version(base, version.replace('.', '_') if version else None) + return (k, v) + + return dict( + [image(lib, v) for lib, versions in DLVMS.items() for v in versions]) + + +DLVM_CONFIG = { + **_dlvm_config(JobMode.CPU), + **_dlvm_config(JobMode.GPU), +} + + +def expand_image(image: str) -> str: + """Returns the DLVM image url for the job model and the comand line arg + or returns None if the key doesn't exist in the config. + """ + return DLVM_CONFIG.get(image, image) + + AptPackages = s.Or( [str], { s.Optional("gpu", default=list): [str], @@ -67,24 +107,24 @@ def parse(label): }, error=""""apt_packages" entry must be a dictionary or list, not '{}'""") +Image = s.And(str, s.Use(expand_image)) + +BaseImage = s.Or( + Image, { + s.Optional("gpu", default=None): Image, + s.Optional("cpu", default=None): Image + }, + error= + """"base_image" entry must be a string OR dict with 'cpu' and 'gpu' keys, not '{}'""" +) + CalibanConfig = s.Schema({ - s.Optional("build_time_credentials", default=False): - bool, - s.Optional("default_mode", default=JobMode.CPU): - s.Use(JobMode.parse), - s.Optional("project_id"): - s.And(str, len), - s.Optional("cloud_key"): - s.And(str, len), - s.Optional("base_image"): - str, - s.Optional("apt_packages", default=dict): - AptPackages, - - # Allow extra entries without killing the schema to allow for backwards - # compatibility. - s.Optional(str): - str, + s.Optional("build_time_credentials", default=False): bool, + s.Optional("default_mode", default=JobMode.CPU): s.Use(JobMode.parse), + s.Optional("project_id"): s.And(str, len), + s.Optional("cloud_key"): s.And(str, len), + s.Optional("base_image", default=None): BaseImage, + s.Optional("apt_packages", default=AptPackages.validate({})): AptPackages }) # Accessors @@ -170,6 +210,22 @@ def apt_packages(conf: CalibanConfig, mode: JobMode) -> List[str]: return packages +def base_image(conf: CalibanConfig, mode: JobMode) -> Optional[str]: + """Returns a custom base image, if the user has supplied one in the + calibanconfig. + + """ + image = conf.get("base_image") + if image is None: + return None + + elif isinstance(image, str): + return image + + # dictionary case. + return image[mode.lower()] + + def caliban_config(conf_path: str = CALIBAN_CONFIG) -> CalibanConfig: """Returns a dict that represents a `.calibanconfig.json` file if present, empty dictionary otherwise. diff --git a/caliban/docker/build.py b/caliban/docker/build.py index 0740853..be0911b 100644 --- a/caliban/docker/build.py +++ b/caliban/docker/build.py @@ -25,8 +25,7 @@ import subprocess from enum import Enum from pathlib import Path -from typing import (Any, Callable, Dict, List, NamedTuple, NewType, Optional, - Union) +from typing import Any, Dict, List, NamedTuple, NewType, Optional, Union from absl import logging from blessings import Terminal @@ -458,7 +457,6 @@ def _extra_dir_entries(workdir: str, user_id: int, user_group: int, def _dockerfile_template( job_mode: c.JobMode, workdir: Optional[str] = None, - base_image_fn: Optional[Callable[[c.JobMode], str]] = None, package: Optional[Union[List, u.Package]] = None, requirements_path: Optional[str] = None, conda_env_path: Optional[str] = None, @@ -486,11 +484,6 @@ def _dockerfile_template( Most functions that call _dockerfile_template pass along any kwargs that they receive. It should be enough to add kwargs here, then rely on that mechanism to pass them along, vs adding kwargs all the way down the call chain. - - Supply a custom base_image_fn (function from job_mode -> image ID) to inject - more complex Docker commands into the Caliban environments by, for example, - building your own image on top of the TF base images, then using that. - """ uid = os.getuid() gid = os.getgid() @@ -502,10 +495,7 @@ def _dockerfile_template( if workdir is None: workdir = DEFAULT_WORKDIR - if base_image_fn is None: - base_image_fn = base_image_id - - base_image = base_image_fn(job_mode) + base_image = c.base_image(caliban_config, job_mode) or base_image_id(job_mode) dockerfile = """ FROM {base_image} diff --git a/tutorials/basic/.calibanconfig.json b/tutorials/basic/.calibanconfig.json index e324ca9..0967ef4 100644 --- a/tutorials/basic/.calibanconfig.json +++ b/tutorials/basic/.calibanconfig.json @@ -1 +1 @@ -{"apt_packages": ["cake"]} +{} From 96cc987d5c2d5cb8a59a64605593dea63405eb72 Mon Sep 17 00:00:00 2001 From: Sam Ritchie Date: Wed, 15 Jul 2020 22:25:03 -0600 Subject: [PATCH 2/3] add custom base image support --- CHANGELOG.md | 63 +++++++++++++- caliban/cli.py | 1 - caliban/config/__init__.py | 43 ++++++---- caliban/platform/notebook.py | 2 +- tests/caliban/config/test_config.py | 125 +++++++++++++++++++++++++++- 5 files changed, 215 insertions(+), 19 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c8dde05..c5dbed3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,14 +1,75 @@ -# 0.2.7 +# 0.3.0 - Caliban now authenticates AI Platform job submissions using the authentication provided by `gcloud auth login`, rather than requiring a service account key. This significantly simplifies the setup required for a first time user. + - `caliban cloud` now checks if the image exists remotely before issuing a `docker push` command on the newly built image (https://github.com/google/caliban/pull/36) + - Big internal refactor to make it easier to work on code, increase test coverage, add new backends (https://github.com/google/caliban/pull/32) +- add `schema` validation for `.calibanconfig.json`. This makes it much easier + to add configuration knobs: https://github.com/google/caliban/pull/37 + +- Custom base image support (https://github.com/google/caliban/pull/39). + `.calibanconfig.json` now supports a `"base_image"` key. For the value, can + supply: + - a Docker base image of your own + - a dict of the form `{"cpu": "base_image", "gpu": "base_image"}` with both + entries optional, of course. + + Two more cool features. + + First, if you use a format string, like `"my_image-{}:latest"`, the format + block `{}` will be filled in with either `cpu` or `gpu`, depending on the mode + Caliban is using. + + Second, we now have native support for [Google's Deep Learning + VMs](https://cloud.google.com/ai-platform/deep-learning-vm/docs/introduction) + as base images. The actual VM containers [live + here](https://console.cloud.google.com/gcr/images/deeplearning-platform-release/GLOBAL). + If you provide any of the following strings, Caliban will expand them out to + the actual base image location: + +``` +dlvm:pytorch-cpu +dlvm:pytorch-cpu-1.0 +dlvm:pytorch-cpu-1.1 +dlvm:pytorch-cpu-1.2 +dlvm:pytorch-cpu-1.3 +dlvm:pytorch-cpu-1.4 +dlvm:pytorch-gpu +dlvm:pytorch-gpu-1.0 +dlvm:pytorch-gpu-1.1 +dlvm:pytorch-gpu-1.2 +dlvm:pytorch-gpu-1.3 +dlvm:pytorch-gpu-1.4 +dlvm:tf-cpu +dlvm:tf-cpu-1.0 +dlvm:tf-cpu-1.13 +dlvm:tf-cpu-1.14 +dlvm:tf-cpu-1.15 +dlvm:tf-gpu +dlvm:tf-gpu-1.0 +dlvm:tf-gpu-1.13 +dlvm:tf-gpu-1.14 +dlvm:tf-gpu-1.15 +dlvm:tf2-cpu +dlvm:tf2-cpu-2.0 +dlvm:tf2-cpu-2.1 +dlvm:tf2-cpu-2.2 +dlvm:tf2-gpu +dlvm:tf2-gpu-2.0 +dlvm:tf2-gpu-2.1 +dlvm:tf2-gpu-2.2 +``` + +Format strings work here as well! So, `"dlvm:pytorch-{}-1.4"` is a totally valid +base image. + # 0.2.6 - Prepared for a variety of base images by setting up a cloud build matrix: diff --git a/caliban/cli.py b/caliban/cli.py index 5ea0dac..443e9f0 100644 --- a/caliban/cli.py +++ b/caliban/cli.py @@ -373,7 +373,6 @@ def dry_run_arg(parser): def container_parser(parser): - executing_parser(parser) image_tag_arg(parser) diff --git a/caliban/config/__init__.py b/caliban/config/__init__.py index bc9ddda..7bb7a60 100644 --- a/caliban/config/__init__.py +++ b/caliban/config/__init__.py @@ -35,7 +35,7 @@ class JobMode(str, Enum): @staticmethod def parse(label): - return JobMode(label.upper()) + return JobMode(label.upper().strip()) DRY_RUN_FLAG = "--dry_run" @@ -58,6 +58,9 @@ def parse(label): "type": "ACCELERATOR_TYPE_UNSPECIFIED" } +# Dictionary of the DLVM "Platform" to a sequence of versions that are +# currently available as DLVMs. The full list of images is here: +# https://console.cloud.google.com/gcr/images/deeplearning-platform-release/GLOBAL/ DLVMS = { "pytorch": [None, "1.0", "1.1", "1.2", "1.3", "1.4"], "tf": [None, "1.0", "1.13", "1.14", "1.15"], @@ -74,19 +77,21 @@ def _dlvm_config(job_mode: JobMode) -> Dict[str, str]: """ mode = job_mode.lower() - def with_version(s: str, version: Optional[str]) -> Tuple[str, str]: - return f"{s}-{version}" if version else s + def with_version(s: str, version: Optional[str], sep: str) -> Tuple[str, str]: + return f"{s}{sep}{version}" if version else s def image(lib: str, version: Optional[str]) -> str: base = f"gcr.io/deeplearning-platform-release/{lib}-{mode}" - k = with_version(f"dlvm:{lib}-{mode}", version) - v = with_version(base, version.replace('.', '_') if version else None) + k = with_version(f"dlvm:{lib}-{mode}", version, "-") + v = with_version(base, version.replace('.', '-') if version else None, ".") return (k, v) return dict( [image(lib, v) for lib, versions in DLVMS.items() for v in versions]) +# This is a dictionary of some identifier like 'dlvm:pytorch-1.0' to the actual +# Docker image ID. DLVM_CONFIG = { **_dlvm_config(JobMode.CPU), **_dlvm_config(JobMode.GPU), @@ -94,8 +99,9 @@ def image(lib: str, version: Optional[str]) -> str: def expand_image(image: str) -> str: - """Returns the DLVM image url for the job model and the comand line arg - or returns None if the key doesn't exist in the config. + """If the supplied image is one of our special prefixed identifiers, returns + the expanded Docker image ID. Else, returns the input. + """ return DLVM_CONFIG.get(image, image) @@ -183,10 +189,6 @@ def extract_region(m: Dict[str, Any]) -> ct.Region: return DEFAULT_REGION -def extract_zone(m: Dict[str, Any]) -> str: - return "{}-a".format(extract_region(m)) - - def extract_cloud_key(m: Dict[str, Any]) -> Optional[str]: """Returns the Google service account key filepath specified in the args; defaults to the $GOOGLE_APPLICATION_CREDENTIALS variable. @@ -214,16 +216,27 @@ def base_image(conf: CalibanConfig, mode: JobMode) -> Optional[str]: """Returns a custom base image, if the user has supplied one in the calibanconfig. + If the custom base image has a marker for a format string, like 'pytorch-{}', + this method will fill it in with the current mode (cpu or gpu). + """ + ret = None + mode_s = mode.lower() + image = conf.get("base_image") if image is None: - return None + return ret elif isinstance(image, str): - return image + ret = image + + else: + # dictionary case. + ret = image[mode_s] - # dictionary case. - return image[mode.lower()] + # we run expand_image again in case the user has included a format {} in the + # string. + return expand_image(ret.format(mode_s)) def caliban_config(conf_path: str = CALIBAN_CONFIG) -> CalibanConfig: diff --git a/caliban/platform/notebook.py b/caliban/platform/notebook.py index a7111ba..1a42569 100644 --- a/caliban/platform/notebook.py +++ b/caliban/platform/notebook.py @@ -74,7 +74,7 @@ def run_notebook(job_mode: c.JobMode, docker_args = ["-p", "{}:{}".format(port, port)] + run_args ps.run_interactive(job_mode, - entrypoint="/opt/conda/envs/caliban/bin/python", + entrypoint="python", entrypoint_args=jupyter_args, run_args=docker_args, inject_notebook=inject_arg, diff --git a/tests/caliban/config/test_config.py b/tests/caliban/config/test_config.py index 3f94229..f60b632 100644 --- a/tests/caliban/config/test_config.py +++ b/tests/caliban/config/test_config.py @@ -1,4 +1,5 @@ #!/usr/bin/python + # # Copyright 2020 Google LLC # @@ -18,18 +19,84 @@ import os from argparse import ArgumentTypeError +import hypothesis.strategies as st +from hypothesis import given + import caliban.config as c import caliban.platform.cloud.types as ct import caliban.util.schema as us import pytest +def test_parse_job_mode(): + assert c.JobMode.parse('CpU') == c.JobMode.CPU + assert c.JobMode.parse('cpu') == c.JobMode.CPU + assert c.JobMode.parse('cpU ') == c.JobMode.CPU + + assert c.JobMode.parse(' GpU') == c.JobMode.GPU + assert c.JobMode.parse('gpu') == c.JobMode.GPU + assert c.JobMode.parse('GPU') == c.JobMode.GPU + + with pytest.raises(Exception): + c.JobMode.parse('random') + + +@given(st.text() | st.sampled_from(sorted(c.DLVM_CONFIG.keys()))) +def test_expand_image(s): + """Expanded images either exist in the DLVM_CONFIG, or are round-tripped + without getting changed. + + """ + if s in c.DLVM_CONFIG: + assert c.expand_image(s) == c.DLVM_CONFIG[s] + else: + assert c.expand_image(s) == s + + def test_gpu(): assert c.gpu(c.JobMode.GPU) assert not c.gpu(c.JobMode.CPU) assert not c.gpu("face") +def test_extract_script_args(): + # Basic cases. If there are NO script args, or if the default is present, + # they're passed back out. + assert c.extract_script_args({}) is None + assert c.extract_script_args({'script_args': None}) is None + assert c.extract_script_args({'script_args': []}) == [] + + args = ["--carrot", "stick"] + + # If a '--' is passed in at the head it's stripped off and ignored. + assert c.extract_script_args({"script_args": ["--"] + args}) == args + assert c.extract_script_args({"script_args": args}) == args + + +def test_extract_project_id(monkeypatch): + if os.environ.get('PROJECT_ID'): + monkeypatch.delenv('PROJECT_ID') + + # if NO project ID is specified on the environment OR in the supplied config, + # the system attempts to exit. + with pytest.raises(SystemExit) as wrapped_e: + c.extract_project_id({}) + + assert wrapped_e.type == SystemExit + assert wrapped_e.value.code == 1 + + # the project ID gets mirrored back if it exists in the config. + assert c.extract_project_id({'project_id': "face"}) == "face" + + monkeypatch.setenv('PROJECT_ID', "env_id") + + # If the env variable is set it's returned: + assert c.extract_project_id({'project_id': None}) == "env_id" + + # Unless the project ID's set in the config. + assert c.extract_project_id({'project_id': "face"}) == "face" + + def test_extract_region(monkeypatch): if os.environ.get('REGION'): monkeypatch.delenv('REGION') @@ -52,17 +119,65 @@ def test_extract_region(monkeypatch): assert c.extract_region({"region": "us-west1"}) == ct.US.west1 +def test_extract_cloud_key(monkeypatch): + k = 'GOOGLE_APPLICATION_CREDENTIALS' + if os.environ.get(k): + monkeypatch.delenv(k) + + # initial missing case. + assert c.extract_cloud_key({}) == None + + monkeypatch.setenv(k, "key.json") + + # env override: + assert c.extract_cloud_key({}) == "key.json" + + # conf takes precedence. + assert c.extract_cloud_key({"cloud_key": "mynewkey.json"}) == "mynewkey.json" + + +def test_base_image(): + # If NO base image is specified, None is returned. + assert c.base_image({}, c.JobMode.CPU) == None + assert c.base_image({}, c.JobMode.GPU) == None + + dlvm = c.CalibanConfig.validate({"base_image": "dlvm:pytorch-{}-1.4"}) + + # If you leave a {} format block, base_image will splice in the job mode. + assert c.base_image(dlvm, + c.JobMode.CPU) == c.DLVM_CONFIG["dlvm:pytorch-cpu-1.4"] + assert c.base_image(dlvm, + c.JobMode.GPU) == c.DLVM_CONFIG["dlvm:pytorch-gpu-1.4"] + + conf = c.CalibanConfig.validate( + {"base_image": { + "cpu": "dlvm:tf2-{}-2.1", + "gpu": "random:latest" + }}) + + # Same trick works even nested in dicts. If the image is NOT a specially + # keyed DLVM, it's untouched. + assert c.base_image(conf, c.JobMode.CPU) == c.DLVM_CONFIG["dlvm:tf2-cpu-2.1"] + assert c.base_image(conf, c.JobMode.GPU) == "random:latest" + + def test_caliban_config(tmpdir): """Tests validation of the CalibanConfig schema and the method that returns the parsed config. """ - valid = {"apt_packages": {"cpu": ["face"]}, "random": "entry"} + valid = {"apt_packages": {"cpu": ["face"]}} valid_path = tmpdir.join('valid.json') with open(valid_path, 'w') as f: json.dump(valid, f) + valid_shared = {"apt_packages": ["face"]} + valid_shared_path = tmpdir.join('valid_shared.json') + + with open(valid_shared_path, 'w') as f: + json.dump(valid_shared, f) + invalid = {"apt_packages": "face"} invalid_path = tmpdir.join('invalid.json') @@ -80,3 +195,11 @@ def test_caliban_config(tmpdir): config = c.caliban_config(valid_path) assert c.apt_packages(config, c.JobMode.GPU) == [] assert c.apt_packages(config, c.JobMode.CPU) == ["face"] + + # If the user supplies a list instead of a dict, all still works well. + valid_shared_conf = c.caliban_config(valid_shared_path) + cpu = c.apt_packages(valid_shared_conf, c.JobMode.CPU) + gpu = c.apt_packages(valid_shared_conf, c.JobMode.GPU) + + assert cpu == gpu + assert cpu == ["face"] From 4d3ed9ec0c7b0bcaad74c7429a1bcf0ee3780520 Mon Sep 17 00:00:00 2001 From: Sam Ritchie Date: Wed, 15 Jul 2020 22:26:37 -0600 Subject: [PATCH 3/3] shoutout to sagravat --- CHANGELOG.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c5dbed3..ef91135 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -14,7 +14,8 @@ - add `schema` validation for `.calibanconfig.json`. This makes it much easier to add configuration knobs: https://github.com/google/caliban/pull/37 -- Custom base image support (https://github.com/google/caliban/pull/39). +- Custom base image support (https://github.com/google/caliban/pull/39), thanks + to https://github.com/google/caliban/pull/20 from @sagravat. `.calibanconfig.json` now supports a `"base_image"` key. For the value, can supply: - a Docker base image of your own