Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ jobs:
runs-on: ubuntu-latest
strategy:
matrix:
python-version: [ '3.9', '3.10', '3.11' ]
python-version: [ '3.10', '3.11' ]
steps:
- uses: actions/checkout@v3
- name: Set up Python ${{ matrix.python-version }}
Expand Down
4 changes: 2 additions & 2 deletions .github/workflows/tag_and_publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,10 @@ jobs:
- uses: actions/checkout@v3
- name: Pull latest changes
run: git pull origin main
- name: Set up Python 3.9
- name: Set up Python 3.10
uses: actions/setup-python@v2
with:
python-version: 3.9
python-version: 3.10
- name: Install dependencies
run: |
pip install --upgrade setuptools wheel twine build
Expand Down
2 changes: 1 addition & 1 deletion post_install.sh
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#!/bin/bash

python -m pip install "git+https://github.com/fsspec/kerchunk" --no-cache-dir
python -m pip install kerchunk==0.2.6 --no-cache-dir
python -m pip install hdf5plugin --no-binary hdf5plugin --no-cache-dir
36 changes: 19 additions & 17 deletions pyproject.toml
100755 → 100644
Original file line number Diff line number Diff line change
Expand Up @@ -6,29 +6,31 @@ build-backend = "setuptools.build_meta"
name = "aind-data-transfer"
description = "Services for compression and transfer of aind-data to the cloud"
license = {text = "MIT"}
requires-python = ">=3.9"
requires-python = ">=3.10"
classifiers = [
"Programming Language :: Python :: 3"
]
readme = "README.md"
dynamic = ["version"]

dependencies = [
'pandas==2.2.0',
's3fs==2024.3.1',
's3transfer[crt]==0.10.0',
'boto3[crt]==1.34.51',
'numpy==1.26.1',
'pyyaml==6.0.1',
'google-cloud-storage==2.12.0',
'pandas',
's3fs>=2024.6.1',
's3transfer[crt]',
'boto3[crt]',
'numpy<2.0.0',
'pyyaml',
'google-cloud-storage',
'pyminizip==0.2.6',
'aind-codeocean-api>=0.4.0',
'aind-data-schema==0.33.3',
'aind-data-schema==1.4.0',
'aind-data-schema-models==0.7.5',
'aind-metadata-service[client]>=0.2.5',
'tqdm==4.64.1',
'aind-data-access-api[secrets]>=0.4.0',
'aind-metadata-mapper>=0.24.0',
'toml==0.10.2',
'zarr==2.17.2',
'zarr==2.18.3',
'numcodecs==0.11.0',
]

Expand All @@ -49,13 +51,13 @@ ephys = [
]
imaging = [
'argschema==3.0.4',
'dask==2024.4.1',
'distributed==2024.4.1',
'bokeh!=3.0.*,>=2.4.2',
'gcsfs==2024.3.1',
'xarray-multiscale==2.1.0',
'xarray==2024.05.0',
'parameterized==0.9.0',
'dask>=2024.12.1',
'distributed>=2024.12.1',
'bokeh',
'gcsfs>=2024.6.1',
'xarray-multiscale==1.2.0',
'xarray',
'parameterized',
'ome-zarr==0.8.3',
'chardet==5.1.0',
'natsort==8.4.0',
Expand Down
10 changes: 2 additions & 8 deletions scripts/processing_manifest.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,10 @@
import re
from datetime import datetime
from enum import Enum
from typing import List, Optional

from aind_data_schema.base import AindModel
from aind_data_schema.models.organizations import Organization
from aind_data_schema.models.modalities import Modality
from aind_data_schema.core.data_description import (
Funding,
datetime_from_name_string,
)
from aind_data_schema.models.units import SizeUnit
from aind_data_schema_models.organizations import Organization
from aind_data_schema_models.units import SizeUnit
from aind_data_schema.core.acquisition import AxisName, Immersion
from pydantic import Field

Expand Down
2 changes: 1 addition & 1 deletion scripts/s3_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
from aind_data_transfer.s3 import S3Uploader
from aind_data_transfer.util import file_utils
from aind_data_transfer.util.dask_utils import get_client
from aind_data_transfer.util.file_utils import collect_filepaths, batch_files_by_size
from aind_data_transfer.util.file_utils import batch_files_by_size


LOG_FMT = "%(asctime)s %(message)s"
Expand Down
6 changes: 3 additions & 3 deletions src/aind_data_transfer/config_loader/base_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@

from aind_data_schema.core.data_description import build_data_name
from aind_data_schema.core.processing import ProcessName
from aind_data_schema.models.modalities import Modality
from aind_data_schema.models.platforms import Platform
from aind_data_schema_models.modalities import Modality
from aind_data_schema_models.platforms import Platform
from pydantic import (
DirectoryPath,
Field,
Expand Down Expand Up @@ -157,7 +157,7 @@ class ModalityConfigs(BaseSettings):
# added to the Modality class
_MODALITY_MAP: ClassVar = {
m().abbreviation.upper().replace("-", "_"): m().abbreviation
for m in Modality._ALL
for m in Modality.ALL
}

# Optional number id to assign to modality config
Expand Down
27 changes: 23 additions & 4 deletions src/aind_data_transfer/jobs/basic_job.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
from aind_data_schema.core.metadata import Metadata, MetadataStatus
from aind_data_schema.core.procedures import Procedures
from aind_data_schema.core.subject import Subject
from aind_data_schema.models.modalities import Modality
from aind_data_schema_models.modalities import Modality

from aind_data_transfer import __version__
from aind_data_transfer.config_loader.base_config import BasicUploadJobConfigs
Expand Down Expand Up @@ -102,12 +102,12 @@ def __core_metadata_fields():
return all_model_fields

@staticmethod
def __download_json(file_location: Path) -> dict:
def _download_json(file_location: Path) -> dict:
with open(file_location, "r") as f:
contents = json.load(f)
return contents

def _initialize_metadata_record(self, temp_dir: Path):
def _initialize_metadata_record(self, temp_dir: Path, session=None, rig=None, acquisition=None):
"""Perform some metadata collection and validation before more
time-consuming compression and upload steps."""

Expand Down Expand Up @@ -202,16 +202,35 @@ def _initialize_metadata_record(self, temp_dir: Path):
data_description_metadata = data_description_metadata_0.model_obj
del core_filename_map[data_description_filename]

# This can be updated again after the job is done.
processing_metadata0 = ProcessingMetadata.from_modalities_configs(
modality_configs=self.job_configs.modalities,
start_date_time=datetime.now(timezone.utc),
end_date_time=datetime.now(timezone.utc),
output_location=(
f"s3://{self.job_configs.s3_bucket}/"
f"{self.job_configs.s3_prefix}"
),
processor_full_name=self.job_configs.processor_full_name,
code_url=self.job_configs.aind_data_transfer_repo_location,
)
processing_metadata0.write_to_json(path=temp_dir)
processing_metadata = processing_metadata0.model_obj

# Update metadata record object
self.metadata_record = Metadata(
name=self.job_configs.s3_prefix,
location=self.job_configs.s3_bucket,
subject=subject_metadata,
procedures=procedures_metadata,
data_description=data_description_metadata,
processing=processing_metadata,
session=session,
rig=rig,
acquisition=acquisition
)
# For the remaining files in metadata dir, copy them over. We'll
# copy al the files regardless of whether they were generated from
# copy all the files regardless of whether they were generated from
# a core model. For the core models, we can attach the contents to
# the metadata record
for file_name, file_path in metadata_in_folder_map.items():
Expand Down
Loading