From 57670c21792594b47aa27d2a2b6db0d0b7d35922 Mon Sep 17 00:00:00 2001 From: hafeezhmha Date: Thu, 22 Jan 2026 18:47:53 +0530 Subject: [PATCH] Add conda package support Adds support for installing conda packages during image builds using micromamba. This enables installation of packages only available through conda-forge and other conda channels. Configuration: - conda_packages: List of packages to install (supports version pinning) - conda_channels: List of channels to use (defaults to conda-forge, defaults) Implementation uses micromamba instead of full conda for smaller image size (~10MB vs ~500MB). Micromamba version pinned to 2.5.0-1 for reproducible builds. Architecture detection ensures compatibility with both amd64 (linux-64) and arm64 (linux-aarch64) platforms. Conda packages are installed to /opt/conda and symlinked to system Python's site-packages, avoiding the need to reinstall cog while making conda packages accessible to the existing Python environment. Fixes #2471 --- docs/yaml.md | 44 +++++++ integration-tests/tests/conda_packages.txtar | 24 ++++ integration-tests/tests/conda_with_pip.txtar | 28 +++++ pkg/config/config.go | 18 ++- pkg/config/config_test.go | 73 ++++++++++++ pkg/config/data/config_schema_v1.0.json | 36 ++++++ pkg/dockerfile/standard_generator.go | 72 ++++++++++- pkg/dockerfile/standard_generator_test.go | 118 +++++++++++++++++++ 8 files changed, 409 insertions(+), 4 deletions(-) create mode 100644 integration-tests/tests/conda_packages.txtar create mode 100644 integration-tests/tests/conda_with_pip.txtar diff --git a/docs/yaml.md b/docs/yaml.md index 07fbdfc7b1..773c48a3b4 100644 --- a/docs/yaml.md +++ b/docs/yaml.md @@ -22,6 +22,50 @@ This stanza describes how to build the Docker image your model runs in. It conta +### `conda_channels` + +Conda channels to search for packages. If not specified, defaults to `["conda-forge", "defaults"]`. + +For example: + +```yaml +build: + conda_channels: + - conda-forge + - bioconda + - defaults + conda_packages: + - biopython +``` + +Channels are searched in the order specified. + +### `conda_packages` + +A list of packages to install via conda. This is useful for packages that are only available through conda-forge or have complex C/C++ dependencies better managed by conda. + +For example: + +```yaml +build: + conda_packages: + - pythonocc-core + - rdkit + conda_channels: + - conda-forge +``` + +You can specify exact versions using the conda format: + +```yaml +build: + conda_packages: + - numpy=1.24.0 + - scipy>=1.10,<2.0 +``` + +Conda packages are installed before pip packages, so you can use both `conda_packages` and `python_requirements` together. Pip will install on top of the conda environment. + ### `cuda` Cog automatically picks the correct version of CUDA to install, but this lets you override it for whatever reason by specifying the minor (`11.8`) or patch (`11.8.0`) version of CUDA to use. diff --git a/integration-tests/tests/conda_packages.txtar b/integration-tests/tests/conda_packages.txtar new file mode 100644 index 0000000000..43e4a55778 --- /dev/null +++ b/integration-tests/tests/conda_packages.txtar @@ -0,0 +1,24 @@ +# Test that conda packages can be installed in cog builds + +cog build -t $TEST_IMAGE +cog predict $TEST_IMAGE -i text='hello' +stdout 'numpy version: 1.24' +stdout 'conda packages work!' + +-- cog.yaml -- +build: + python_version: "3.11" + conda_packages: + - numpy=1.24.3 + conda_channels: + - conda-forge +predict: predict.py:Predictor + +-- predict.py -- +from cog import BasePredictor +import numpy as np + +class Predictor(BasePredictor): + def predict(self, text: str) -> str: + numpy_version = np.__version__ + return f"numpy version: {numpy_version}, conda packages work! input: {text}" diff --git a/integration-tests/tests/conda_with_pip.txtar b/integration-tests/tests/conda_with_pip.txtar new file mode 100644 index 0000000000..44a9a5a53b --- /dev/null +++ b/integration-tests/tests/conda_with_pip.txtar @@ -0,0 +1,28 @@ +# Test that conda packages work together with pip packages + +cog build -t $TEST_IMAGE +cog predict $TEST_IMAGE +stdout 'numpy from conda' +stdout 'requests from pip' + +-- cog.yaml -- +build: + python_version: "3.11" + conda_packages: + - numpy=1.24.3 + conda_channels: + - conda-forge + python_requirements: requirements.txt +predict: predict.py:Predictor + +-- requirements.txt -- +requests==2.31.0 + +-- predict.py -- +from cog import BasePredictor +import numpy as np +import requests + +class Predictor(BasePredictor): + def predict(self) -> str: + return f"numpy from conda: {np.__version__}, requests from pip: {requests.__version__}" diff --git a/pkg/config/config.go b/pkg/config/config.go index 2074c1d3fd..b42869b0d9 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -24,7 +24,6 @@ var ( PipPackageNameRegex = regexp.MustCompile(`^([^>=<~ \n[#]+)`) ) -// TODO(andreas): support conda packages // TODO(andreas): support dockerfiles // TODO(andreas): custom cpu/gpu installs // TODO(andreas): suggest valid torchvision versions (e.g. if the user wants to use 0.8.0, suggest 0.8.1) @@ -50,6 +49,8 @@ type Build struct { PythonVersion string `json:"python_version,omitempty" yaml:"python_version"` PythonRequirements string `json:"python_requirements,omitempty" yaml:"python_requirements,omitempty"` PythonPackages []string `json:"python_packages,omitempty" yaml:"python_packages,omitempty"` // Deprecated, but included for backwards compatibility + CondaPackages []string `json:"conda_packages,omitempty" yaml:"conda_packages,omitempty"` // Conda packages to install via micromamba (e.g., conda-only packages from conda-forge) + CondaChannels []string `json:"conda_channels,omitempty" yaml:"conda_channels,omitempty"` // Conda channels for package installation (defaults to ["conda-forge", "defaults"]) Run []RunItem `json:"run,omitempty" yaml:"run,omitempty"` SystemPackages []string `json:"system_packages,omitempty" yaml:"system_packages,omitempty"` PreInstall []string `json:"pre_install,omitempty" yaml:"pre_install,omitempty"` // Deprecated, but included for backwards compatibility @@ -335,6 +336,21 @@ func (c *Config) ValidateAndComplete(projectDir string) error { c.Build.pythonRequirementsContent = c.Build.PythonPackages } + // Validate conda packages + if len(c.Build.CondaPackages) > 0 { + // If no channels specified, add conda-forge as default + if len(c.Build.CondaChannels) == 0 { + c.Build.CondaChannels = []string{"conda-forge", "defaults"} + } + + // Validate package names (basic check) + for _, pkg := range c.Build.CondaPackages { + if strings.TrimSpace(pkg) == "" { + errs = append(errs, fmt.Errorf("conda_packages contains empty package name")) + } + } + } + if c.Build.GPU { if err := c.validateAndCompleteCUDA(); err != nil { errs = append(errs, err) diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 2aeba66454..df227db130 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -763,3 +763,76 @@ func TestContainsCoglet(t *testing.T) { require.NoError(t, err) require.True(t, config.ContainsCoglet()) } + +func TestCondaPackagesBasic(t *testing.T) { + config := &Config{ + Build: &Build{ + PythonVersion: "3.11", + CondaPackages: []string{"pythonocc-core", "numpy=1.24"}, + CondaChannels: []string{"conda-forge"}, + }, + } + + projectDir, err := os.MkdirTemp("", "test") + require.NoError(t, err) + defer os.RemoveAll(projectDir) + + err = config.ValidateAndComplete(projectDir) + require.NoError(t, err) + require.Equal(t, []string{"conda-forge"}, config.Build.CondaChannels) +} + +func TestCondaPackagesDefaultChannels(t *testing.T) { + config := &Config{ + Build: &Build{ + PythonVersion: "3.11", + CondaPackages: []string{"pythonocc-core"}, + // No channels specified + }, + } + + projectDir, err := os.MkdirTemp("", "test") + require.NoError(t, err) + defer os.RemoveAll(projectDir) + + err = config.ValidateAndComplete(projectDir) + require.NoError(t, err) + // Should get default channels + require.Equal(t, []string{"conda-forge", "defaults"}, config.Build.CondaChannels) +} + +func TestCondaAndPipTogether(t *testing.T) { + // Test that conda and pip can coexist + config := &Config{ + Build: &Build{ + PythonVersion: "3.11", + CondaPackages: []string{"pythonocc-core"}, + PythonPackages: []string{"torch==2.0.0"}, + }, + } + + projectDir, err := os.MkdirTemp("", "test") + require.NoError(t, err) + defer os.RemoveAll(projectDir) + + err = config.ValidateAndComplete(projectDir) + require.NoError(t, err) + // Should NOT error - both are allowed +} + +func TestCondaPackagesEmpty(t *testing.T) { + config := &Config{ + Build: &Build{ + PythonVersion: "3.11", + CondaPackages: []string{"", "numpy"}, + }, + } + + projectDir, err := os.MkdirTemp("", "test") + require.NoError(t, err) + defer os.RemoveAll(projectDir) + + err = config.ValidateAndComplete(projectDir) + require.Error(t, err) + require.Contains(t, err.Error(), "empty package name") +} diff --git a/pkg/config/data/config_schema_v1.0.json b/pkg/config/data/config_schema_v1.0.json index 0d429e47c2..66f61b6c6b 100644 --- a/pkg/config/data/config_schema_v1.0.json +++ b/pkg/config/data/config_schema_v1.0.json @@ -73,6 +73,42 @@ "type": "string", "description": "A pip requirements file specifying the Python packages to install." }, + "conda_packages": { + "$id": "#/properties/build/properties/conda_packages", + "type": [ + "array", + "null" + ], + "description": "A list of packages to install via conda (e.g., conda-only packages from conda-forge).", + "additionalItems": true, + "items": { + "$id": "#/properties/build/properties/conda_packages/items", + "anyOf": [ + { + "$id": "#/properties/build/properties/conda_packages/items/anyOf/0", + "type": "string" + } + ] + } + }, + "conda_channels": { + "$id": "#/properties/build/properties/conda_channels", + "type": [ + "array", + "null" + ], + "description": "Conda channels to use for package installation. Defaults to ['conda-forge', 'defaults'] if not specified.", + "additionalItems": true, + "items": { + "$id": "#/properties/build/properties/conda_channels/items", + "anyOf": [ + { + "$id": "#/properties/build/properties/conda_channels/items/anyOf/0", + "type": "string" + } + ] + } + }, "system_packages": { "$id": "#/properties/build/properties/system_packages", "type": [ diff --git a/pkg/dockerfile/standard_generator.go b/pkg/dockerfile/standard_generator.go index a784f998ec..38d0e9cd53 100644 --- a/pkg/dockerfile/standard_generator.go +++ b/pkg/dockerfile/standard_generator.go @@ -93,7 +93,7 @@ func NewStandardGenerator(config *config.Config, dir string, command command.Com Config: config, Dir: dir, GOOS: runtime.GOOS, - GOARCH: runtime.GOOS, + GOARCH: runtime.GOARCH, tmpDir: tmpDir, relativeTmpDir: relativeTmpDir, fileWalker: filepath.Walk, @@ -158,6 +158,10 @@ func (g *StandardGenerator) GenerateInitialSteps(ctx context.Context) (string, e if err != nil { return "", err } + condaInstalls, err := g.condaInstalls() + if err != nil { + return "", err + } pipInstalls, err := g.pipInstalls() if err != nil { return "", err @@ -182,6 +186,10 @@ func (g *StandardGenerator) GenerateInitialSteps(ctx context.Context) (string, e if installCog != "" { steps = append(steps, installCog) } + // Install conda packages before pip packages + if condaInstalls != "" { + steps = append(steps, condaInstalls) + } steps = append(steps, pipInstalls) if g.precompile { steps = append(steps, PrecompilePythonCommand) @@ -200,9 +208,12 @@ func (g *StandardGenerator) GenerateInitialSteps(ctx context.Context) (string, e envs, aptInstalls, installPython, - pipInstalls, - installCog, } + // Install conda packages before pip packages + if condaInstalls != "" { + steps = append(steps, condaInstalls) + } + steps = append(steps, pipInstalls, installCog) if g.precompile { steps = append(steps, PrecompilePythonCommand) } @@ -677,6 +688,61 @@ func (g *StandardGenerator) pipInstalls() (string, error) { }, "\n"), nil } +// condaInstalls generates Dockerfile lines to install conda packages using micromamba. +// Returns empty string if no conda packages are specified. +// Micromamba is used instead of conda/miniconda for smaller image size (~10MB vs ~500MB). +func (g *StandardGenerator) condaInstalls() (string, error) { + if len(g.Config.Build.CondaPackages) == 0 { + return "", nil + } + + lines := []string{} + + // Install bzip2 (required for extracting micromamba) and micromamba + // We use micromamba instead of full conda/miniconda for: + // - Smaller image size (micromamba is ~10MB vs conda ~500MB) + // - Faster installation + // - Drop-in replacement for conda commands + lines = append(lines, "RUN --mount=type=cache,target=/var/cache/apt,sharing=locked apt-get update -qq && apt-get install -qqy --no-install-recommends bzip2 && rm -rf /var/lib/apt/lists/*") + + // Detect architecture within the Docker build context and download micromamba + // Pinned to version 2.5.0-1 for reproducible builds + // Downloads directly from GitHub releases for version pinning support + lines = append(lines, strings.Join([]string{ + "RUN ARCH=$([ \"$(uname -m)\" = \"x86_64\" ] && echo \"linux-64\" || echo \"linux-aarch64\")", + "&& curl -Ls https://github.com/mamba-org/micromamba-releases/releases/download/2.5.0-1/micromamba-${ARCH}", + "--output /usr/local/bin/micromamba", + "&& chmod +x /usr/local/bin/micromamba", + }, " ")) + + // Configure channels + channelArgs := []string{} + for _, channel := range g.Config.Build.CondaChannels { + channelArgs = append(channelArgs, "-c "+channel) + } + + // Install conda packages to /opt/conda and symlink to system Python + // This avoids needing to reinstall cog while allowing conda packages to work + packages := strings.Join(g.Config.Build.CondaPackages, " ") + pythonVersion := g.Config.Build.PythonVersion + + lines = append(lines, fmt.Sprintf( + "RUN --mount=type=cache,target=/root/.mamba/pkgs micromamba create -y -p /opt/conda python=%s %s %s && micromamba clean -a -y", + pythonVersion, + strings.Join(channelArgs, " "), + packages, + )) + + // Symlink conda packages to system Python's site-packages + lines = append(lines, fmt.Sprintf( + "RUN for pkg in /opt/conda/lib/python%s/site-packages/*; do [ -e \"$pkg\" ] && ln -sf \"$pkg\" \"/usr/local/lib/python%s/site-packages/$(basename \"$pkg\")\" || true; done", + pythonVersion, + pythonVersion, + )) + + return strings.Join(lines, "\n"), nil +} + func (g *StandardGenerator) runCommands() (string, error) { runCommands := g.Config.Build.Run diff --git a/pkg/dockerfile/standard_generator_test.go b/pkg/dockerfile/standard_generator_test.go index 89fb2f6cf3..19d84d149a 100644 --- a/pkg/dockerfile/standard_generator_test.go +++ b/pkg/dockerfile/standard_generator_test.go @@ -1170,3 +1170,121 @@ predict: predict.py:Predictor require.Contains(t, actual, "ENV R8_COG_VERSION=coglet") require.Contains(t, actual, "ENV R8_PYTHON_VERSION=3.11") } + +func TestCondaInstalls(t *testing.T) { + tmpDir := t.TempDir() + + conf := &config.Config{ + Build: &config.Build{ + PythonVersion: "3.11", + CondaPackages: []string{"pythonocc-core", "numpy=1.24"}, + CondaChannels: []string{"conda-forge"}, + }, + } + + command := dockertest.NewMockCommand() + client := registrytest.NewMockRegistryClient() + gen, err := NewStandardGenerator(conf, tmpDir, command, client, true) + require.NoError(t, err) + + output, err := gen.condaInstalls() + require.NoError(t, err) + require.Contains(t, output, "micromamba create") + require.Contains(t, output, "-c conda-forge") + require.Contains(t, output, "pythonocc-core") + require.Contains(t, output, "numpy=1.24") + require.Contains(t, output, "micromamba clean -a -y") + require.Contains(t, output, "site-packages") +} + +func TestCondaInstallsEmpty(t *testing.T) { + tmpDir := t.TempDir() + + conf := &config.Config{ + Build: &config.Build{ + PythonVersion: "3.11", + CondaPackages: []string{}, + }, + } + + command := dockertest.NewMockCommand() + client := registrytest.NewMockRegistryClient() + gen, err := NewStandardGenerator(conf, tmpDir, command, client, true) + require.NoError(t, err) + + output, err := gen.condaInstalls() + require.NoError(t, err) + require.Equal(t, "", output) +} + +func TestCondaInstallsMultipleChannels(t *testing.T) { + tmpDir := t.TempDir() + + conf := &config.Config{ + Build: &config.Build{ + PythonVersion: "3.11", + CondaPackages: []string{"biopython"}, + CondaChannels: []string{"conda-forge", "bioconda", "defaults"}, + }, + } + + command := dockertest.NewMockCommand() + client := registrytest.NewMockRegistryClient() + gen, err := NewStandardGenerator(conf, tmpDir, command, client, true) + require.NoError(t, err) + + output, err := gen.condaInstalls() + require.NoError(t, err) + require.Contains(t, output, "-c conda-forge") + require.Contains(t, output, "-c bioconda") + require.Contains(t, output, "-c defaults") +} + +func TestCondaInstallsArchitectureDetection(t *testing.T) { + tmpDir := t.TempDir() + + conf := &config.Config{ + Build: &config.Build{ + PythonVersion: "3.11", + CondaPackages: []string{"numpy"}, + CondaChannels: []string{"conda-forge"}, + }, + } + + command := dockertest.NewMockCommand() + client := registrytest.NewMockRegistryClient() + gen, err := NewStandardGenerator(conf, tmpDir, command, client, true) + require.NoError(t, err) + + output, err := gen.condaInstalls() + require.NoError(t, err) + // Verify architecture detection using uname -m + require.Contains(t, output, "uname -m") + require.Contains(t, output, "linux-64") + require.Contains(t, output, "linux-aarch64") + // Verify download from GitHub releases with version pinning + require.Contains(t, output, "github.com/mamba-org/micromamba-releases/releases/download/2.5.0-1") + require.Contains(t, output, "micromamba-${ARCH}") +} + +func TestCondaInstallsVersionPinned(t *testing.T) { + tmpDir := t.TempDir() + + conf := &config.Config{ + Build: &config.Build{ + PythonVersion: "3.11", + CondaPackages: []string{"numpy"}, + }, + } + + command := dockertest.NewMockCommand() + client := registrytest.NewMockRegistryClient() + gen, err := NewStandardGenerator(conf, tmpDir, command, client, true) + require.NoError(t, err) + + output, err := gen.condaInstalls() + require.NoError(t, err) + // Verify version is pinned (not using "latest") + require.Contains(t, output, "2.5.0-1") + require.NotContains(t, output, "latest") +}