From 190c53c675e98749c07b29e012c98a285d4dbec9 Mon Sep 17 00:00:00 2001 From: Simit Pradhan Date: Thu, 12 Mar 2026 17:08:00 +0530 Subject: [PATCH 1/4] Implement gsutil download for GCS URLs Add a new function to download files using gsutil for GCS URLs. --- src/openpi/shared/download.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/src/openpi/shared/download.py b/src/openpi/shared/download.py index 8868825d80..ba56ff4c6d 100644 --- a/src/openpi/shared/download.py +++ b/src/openpi/shared/download.py @@ -8,6 +8,7 @@ import stat import time import urllib.parse +import subprocess import filelock import fsspec @@ -83,7 +84,12 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl # Download the data to a local cache. logger.info(f"Downloading {url} to {local_path}") scratch_path = local_path.with_suffix(".partial") - _download_fsspec(url, scratch_path, **kwargs) + # Route openpi-assets through gsutil to avoid gcsfs auth issues with this bucket. + # All other gs:// URLs (e.g. big_vision) continue to use gcsfs as normal. + if parsed.scheme == "gs" and parsed.netloc == "openpi-assets": + _download_gsutil(url, scratch_path) + else: + _download_fsspec(url, scratch_path, **kwargs) shutil.move(scratch_path, local_path) _ensure_permissions(local_path) @@ -97,6 +103,13 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl return local_path +def _download_gsutil(url: str, local_path: pathlib.Path) -> None: + """Download a file or directory from GCS using gsutil.""" + local_path.parent.mkdir(parents=True, exist_ok=True) + subprocess.run( + ["gsutil", "-m", "cp", "-r", url, str(local_path)], + check=True, + ) def _download_fsspec(url: str, local_path: pathlib.Path, **kwargs) -> None: """Download a file from a remote filesystem to the local cache, and return the local path.""" From 78c2d352f13ce9c2a1a60072f2958adcda7c8cfc Mon Sep 17 00:00:00 2001 From: Simit Pradhan Date: Thu, 12 Mar 2026 17:27:16 +0530 Subject: [PATCH 2/4] Fallback to default download method if gsutil is not available - Fallback to default download method if gsutil is not available - Update _download_gsutil to accept additional kwargs for flexibility in downloading files. --- src/openpi/shared/download.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/openpi/shared/download.py b/src/openpi/shared/download.py index ba56ff4c6d..524921b2f5 100644 --- a/src/openpi/shared/download.py +++ b/src/openpi/shared/download.py @@ -87,7 +87,7 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl # Route openpi-assets through gsutil to avoid gcsfs auth issues with this bucket. # All other gs:// URLs (e.g. big_vision) continue to use gcsfs as normal. if parsed.scheme == "gs" and parsed.netloc == "openpi-assets": - _download_gsutil(url, scratch_path) + _download_gsutil(url, scratch_path, **kwargs) else: _download_fsspec(url, scratch_path, **kwargs) @@ -103,8 +103,12 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl return local_path -def _download_gsutil(url: str, local_path: pathlib.Path) -> None: - """Download a file or directory from GCS using gsutil.""" +def _download_gsutil(url: str, local_path: pathlib.Path, **kwargs) -> None: + """Download a file or directory from GCS using gsutil if available, otherwise fall back to gcsfs.""" + if shutil.which("gsutil") is None: + logger.warning("gsutil not found, falling back to gcsfs. This may fail if GCP credentials are not configured correctly.") + _download_fsspec(url, local_path, **kwargs) + return local_path.parent.mkdir(parents=True, exist_ok=True) subprocess.run( ["gsutil", "-m", "cp", "-r", url, str(local_path)], From b660f4d529273f496478754527138194425f3448 Mon Sep 17 00:00:00 2001 From: Simit Pradhan Date: Thu, 12 Mar 2026 18:00:44 +0530 Subject: [PATCH 3/4] Fix local_path creation and update gsutil command --- src/openpi/shared/download.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/openpi/shared/download.py b/src/openpi/shared/download.py index 524921b2f5..878a7ba835 100644 --- a/src/openpi/shared/download.py +++ b/src/openpi/shared/download.py @@ -109,9 +109,9 @@ def _download_gsutil(url: str, local_path: pathlib.Path, **kwargs) -> None: logger.warning("gsutil not found, falling back to gcsfs. This may fail if GCP credentials are not configured correctly.") _download_fsspec(url, local_path, **kwargs) return - local_path.parent.mkdir(parents=True, exist_ok=True) + local_path.mkdir(parents=True, exist_ok=True) subprocess.run( - ["gsutil", "-m", "cp", "-r", url, str(local_path)], + ["gsutil", "-m", "cp", "-r", f"{url}/*", str(local_path)], check=True, ) From 2601fa260a20b502d2252cb2c4e2fe8dbff322b9 Mon Sep 17 00:00:00 2001 From: Simit Pradhan Date: Fri, 13 Mar 2026 10:43:47 +0530 Subject: [PATCH 4/4] lint code --- src/openpi/shared/download.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/openpi/shared/download.py b/src/openpi/shared/download.py index 878a7ba835..2276507e6e 100644 --- a/src/openpi/shared/download.py +++ b/src/openpi/shared/download.py @@ -6,9 +6,9 @@ import re import shutil import stat +import subprocess import time import urllib.parse -import subprocess import filelock import fsspec @@ -103,10 +103,13 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl return local_path + def _download_gsutil(url: str, local_path: pathlib.Path, **kwargs) -> None: """Download a file or directory from GCS using gsutil if available, otherwise fall back to gcsfs.""" if shutil.which("gsutil") is None: - logger.warning("gsutil not found, falling back to gcsfs. This may fail if GCP credentials are not configured correctly.") + logger.warning( + "gsutil not found, falling back to gcsfs. This may fail if GCP credentials are not configured correctly." + ) _download_fsspec(url, local_path, **kwargs) return local_path.mkdir(parents=True, exist_ok=True) @@ -115,6 +118,7 @@ def _download_gsutil(url: str, local_path: pathlib.Path, **kwargs) -> None: check=True, ) + def _download_fsspec(url: str, local_path: pathlib.Path, **kwargs) -> None: """Download a file from a remote filesystem to the local cache, and return the local path.""" fs, _ = fsspec.core.url_to_fs(url, **kwargs)