Skip to content

Commit e6b0441

Browse files
authored
Implement gsutil download for GCS URLs (#901)
Add a new function to download files using gsutil for GCS URLs.
2 parents e01d229 + 2601fa2 commit e6b0441

1 file changed

Lines changed: 22 additions & 1 deletion

File tree

src/openpi/shared/download.py

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66
import re
77
import shutil
88
import stat
9+
import subprocess
910
import time
1011
import urllib.parse
1112

@@ -83,7 +84,12 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl
8384
# Download the data to a local cache.
8485
logger.info(f"Downloading {url} to {local_path}")
8586
scratch_path = local_path.with_suffix(".partial")
86-
_download_fsspec(url, scratch_path, **kwargs)
87+
# Route openpi-assets through gsutil to avoid gcsfs auth issues with this bucket.
88+
# All other gs:// URLs (e.g. big_vision) continue to use gcsfs as normal.
89+
if parsed.scheme == "gs" and parsed.netloc == "openpi-assets":
90+
_download_gsutil(url, scratch_path, **kwargs)
91+
else:
92+
_download_fsspec(url, scratch_path, **kwargs)
8793

8894
shutil.move(scratch_path, local_path)
8995
_ensure_permissions(local_path)
@@ -98,6 +104,21 @@ def maybe_download(url: str, *, force_download: bool = False, **kwargs) -> pathl
98104
return local_path
99105

100106

107+
def _download_gsutil(url: str, local_path: pathlib.Path, **kwargs) -> None:
108+
"""Download a file or directory from GCS using gsutil if available, otherwise fall back to gcsfs."""
109+
if shutil.which("gsutil") is None:
110+
logger.warning(
111+
"gsutil not found, falling back to gcsfs. This may fail if GCP credentials are not configured correctly."
112+
)
113+
_download_fsspec(url, local_path, **kwargs)
114+
return
115+
local_path.mkdir(parents=True, exist_ok=True)
116+
subprocess.run(
117+
["gsutil", "-m", "cp", "-r", f"{url}/*", str(local_path)],
118+
check=True,
119+
)
120+
121+
101122
def _download_fsspec(url: str, local_path: pathlib.Path, **kwargs) -> None:
102123
"""Download a file from a remote filesystem to the local cache, and return the local path."""
103124
fs, _ = fsspec.core.url_to_fs(url, **kwargs)

0 commit comments

Comments
 (0)