Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 21 additions & 7 deletions app/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@

from app.db.types import StorageType

# size aliases
KB = 1024
MB = KB * 1024
GB = MB * 1024
TB = GB * 1024


class Settings(BaseSettings):
model_config = SettingsConfigDict(
Expand Down Expand Up @@ -46,18 +52,26 @@ class Settings(BaseSettings):
AUTH_CACHE_MAX_TTL: int = 300 # seconds
AUTH_CACHE_INFO: bool = False

S3_PRESIGNED_URL_NETLOC: str | None = None # to override the presigned url hostname and port
S3_MULTIPART_THRESHOLD: int = 5 * 1024**2 # bytes # TODO: decide an appropriate value
# to override the presigned url hostname and port when running locally
S3_PRESIGNED_URL_NETLOC: str | None = None
S3_PRESIGNED_URL_EXPIRATION: int = 6 * 3600 # 6 hours

S3_MULTIPART_UPLOAD_MAX_SIZE: int = 1024**4 # 1TB
S3_MULTIPART_UPLOAD_MIN_PART_SIZE: int = 5 * 1024**2
S3_MULTIPART_UPLOAD_MAX_PART_SIZE: int = 5 * 1024**3
# upload_fileobj: data flows through the service
S3_MULTIPART_UPLOAD_THRESHOLD: int = 100 * MB
S3_MULTIPART_UPLOAD_CHUNKSIZE: int = 10 * MB
S3_MULTIPART_UPLOAD_MAX_CONCURRENCY: int = 10
# copy: server-side, data stays in S3
S3_MULTIPART_COPY_THRESHOLD: int = 5 * GB
S3_MULTIPART_COPY_CHUNKSIZE: int = 1 * GB
S3_MULTIPART_COPY_MAX_CONCURRENCY: int = 10

S3_MULTIPART_UPLOAD_MAX_SIZE: int = 1 * TB
S3_MULTIPART_UPLOAD_MIN_PART_SIZE: int = 5 * MB
S3_MULTIPART_UPLOAD_MAX_PART_SIZE: int = 5 * GB
S3_MULTIPART_UPLOAD_MIN_PARTS: int = 1
S3_MULTIPART_UPLOAD_MAX_PARTS: int = 10_000
S3_MULTIPART_UPLOAD_DEFAULT_PARTS: int = 100

API_ASSET_POST_MAX_SIZE: int = 150 * 1024**2 # bytes # TODO: decide an appropriate value
API_ASSET_POST_MAX_SIZE: int = 150 * MB
PAGINATION_DEFAULT_PAGE_SIZE: int = 30
PAGINATION_MAX_PAGE_SIZE: int = 200

Expand Down
12 changes: 12 additions & 0 deletions app/db/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,17 +6,29 @@
from sqlalchemy.orm import DeclarativeBase, InstrumentedAttribute, RelationshipProperty

from app.db.model import (
Activity,
Base,
CellMorphologyProtocol,
Entity,
ETypeClassification,
Identifiable,
LocationMixin,
MeasurableEntityMixin,
MTypeClassification,
)
from app.db.types import CellMorphologyGenerationType, EntityType, ResourceType
from app.logger import L
from app.schemas.utils import NOT_SET

PublishableBaseModel = Activity | Entity | ETypeClassification | MTypeClassification

PUBLISHABLE_BASE_CLASSES: list[type[PublishableBaseModel]] = [
Activity,
Entity,
ETypeClassification,
MTypeClassification,
]

MEASURABLE_ENTITIES: dict[str, type[Entity]] = {
mapper.class_.__tablename__: mapper.class_
for mapper in Base.registry.mappers
Expand Down
88 changes: 85 additions & 3 deletions app/routers/admin.py
Original file line number Diff line number Diff line change
@@ -1,19 +1,33 @@
import uuid
from typing import Annotated

from fastapi import APIRouter
from fastapi import APIRouter, Query

from app.config import storages
from app.db.types import StorageType
from app.db.utils import RESOURCE_TYPE_TO_CLASS
from app.dependencies.common import PaginationQuery
from app.dependencies.db import RepoGroupDep, SessionDep
from app.dependencies.s3 import StorageClientFactoryDep
from app.filters.asset import AssetFilterDep
from app.queries.common import router_admin_delete_one
from app.schemas.asset import (
AssetRead,
)
from app.schemas.publish import ChangeProjectVisibilityResponse
from app.schemas.routers import DeleteResponse
from app.schemas.types import ListResponse
from app.service import admin as admin_service, asset as asset_service
from app.utils.routers import EntityRoute, ResourceRoute, entity_route_to_type, route_to_type
from app.service import (
admin as admin_service,
asset as asset_service,
publish as publish_service,
)
from app.utils.routers import (
EntityRoute,
ResourceRoute,
entity_route_to_type,
route_to_type,
)

router = APIRouter(
prefix="/admin",
Expand Down Expand Up @@ -89,3 +103,71 @@ def delete_entity_asset(
)
# Note: Asset storage object is deleted via app.db.events
return asset


@router.post("/publish-project/{project_id}")
def publish_project(
db: SessionDep,
storage_client_factory: StorageClientFactoryDep,
*,
project_id: uuid.UUID,
max_assets: Annotated[
int | None, Query(description="Limit the number of assets to be made public.")
] = None,
dry_run: Annotated[
bool, Query(description="Simulate the operation without making any change.")
],
) -> ChangeProjectVisibilityResponse:
"""Publish the content of a project.

This endpoint is used to make public the resources in a project.

It's recommended to call the endpoint with dry_run=true before running it with dry_run=false.

If max_assets is specified, the endpoint should be called multiple times until the response
says that the operation is completed.
"""
storage = storages[StorageType.aws_s3_internal]
s3_client = storage_client_factory(storage)
return publish_service.set_project_visibility(
db=db,
s3_client=s3_client,
project_id=project_id,
storage=storage,
max_assets=max_assets,
dry_run=dry_run,
public=True,
)


@router.post("/unpublish-project/{project_id}")
def unpublish_project(
db: SessionDep,
storage_client_factory: StorageClientFactoryDep,
project_id: uuid.UUID,
*,
max_assets: Annotated[
int | None, Query(description="Limit the number of assets to be made private.")
] = None,
dry_run: bool,
) -> ChangeProjectVisibilityResponse:
"""Unpublish the content of a project.

This endpoint is used to make private the resources in a project.

It's recommended to call the endpoint with dry_run=true before running it with dry_run=false.

If max_assets is specified, the endpoint should be called multiple times until the response
says that the operation is completed.
"""
storage = storages[StorageType.aws_s3_internal]
s3_client = storage_client_factory(storage)
return publish_service.set_project_visibility(
db=db,
s3_client=s3_client,
project_id=project_id,
storage=storage,
max_assets=max_assets,
dry_run=dry_run,
public=False,
)
66 changes: 66 additions & 0 deletions app/schemas/publish.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import uuid
from typing import Annotated

from pydantic import BaseModel, Field


class MoveFileResult(BaseModel):
size: Annotated[int, Field(description="Size of the file")]
error: str | None = None


class MoveDirectoryResult(BaseModel):
size: Annotated[int, Field(description="Size of moved files in the directory")] = 0
file_count: Annotated[int, Field(description="Number of moved files in the directory")] = 0
errors: list[str] = []

def update_from_file_result(self, file_result: MoveFileResult) -> None:
self.size += file_result.size
self.file_count += 1
if file_result.error:
self.errors.append(file_result.error)


class MoveAssetsResult(BaseModel):
total_size: Annotated[int, Field(description="Total size of moved files")] = 0
file_count: Annotated[int, Field(description="Number of moved files")] = 0
asset_count: Annotated[int, Field(description="Number of updated assets")] = 0
errors: list[str] = []

def update_from_file_result(self, file_result: MoveFileResult) -> None:
self.total_size += file_result.size
self.file_count += 1
self.asset_count += 1
if file_result.error:
self.errors.append(file_result.error)

def update_from_directory_result(self, directory_result: MoveDirectoryResult) -> None:
self.total_size += directory_result.size
self.file_count += directory_result.file_count
self.asset_count += 1
self.errors.extend(directory_result.errors)


class ChangeProjectVisibilityResponse(BaseModel):
"""Successful response to the publish or unpublish operation."""

message: Annotated[str, Field(description="A human-readable message describing the result")]
project_id: Annotated[uuid.UUID, Field(description="ID of the project")]
public: Annotated[bool, Field(description="Whether the content is now public or private")]
resource_count: Annotated[
int,
Field(description="Number of updated resources (activities, entities, classifications)"),
]
move_assets_result: Annotated[
MoveAssetsResult, Field(description="Result of the assets movement")
]
dry_run: Annotated[bool, Field(description="True if the operation has been simulated only")]
completed: Annotated[
bool,
Field(
description=(
"Whether the assets have been fully updated. It may be False if `max_assets` "
"have been specified, and there are still assets to be moved."
)
),
]
Loading
Loading