From 93864950c0b35c3650050f196eca47e8b5f43abd Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Mon, 28 Jul 2025 22:04:20 -0700 Subject: [PATCH 1/7] first draft of allowing S3 as a backend --- constants.py | 8 ++ core/management/commands/importrois.py | 143 ++++++++++++++++++++----- core/models.py | 31 +++++- docker-compose.prod.yml | 2 + docker-compose.yml | 2 + env.example | 4 + photic/settings.py | 4 + requirements.txt | 1 + services/__init__.py | 0 services/s3_service.py | 9 ++ 10 files changed, 173 insertions(+), 31 deletions(-) create mode 100644 constants.py create mode 100644 services/__init__.py create mode 100644 services/s3_service.py diff --git a/constants.py b/constants.py new file mode 100644 index 0000000..ab3bd37 --- /dev/null +++ b/constants.py @@ -0,0 +1,8 @@ +from enum import Enum + +ALLOWED_FILE_TYPES = ['.png', '.jpg'] +S3_DELIMITER = "/" + +class StorageOrigin(Enum): + LOCAL = 'local' + S3 = 's3' \ No newline at end of file diff --git a/core/management/commands/importrois.py b/core/management/commands/importrois.py index a898127..686b03d 100644 --- a/core/management/commands/importrois.py +++ b/core/management/commands/importrois.py @@ -1,67 +1,158 @@ import os +import boto3 from django.core.management.base import BaseCommand, CommandError from django.contrib.auth.models import User from core.models import ROI, Annotation, ImageCollection, Label +from services.s3_service import S3Service +from constants import StorageOrigin, ALLOWED_FILE_TYPES, S3_DELIMITER + + class Command(BaseCommand): help = 'import rois' def add_arguments(self, parser): - parser.add_argument('directory', type=str, help='directory containing images') + parser.add_argument('directory', type=str, help='directory (or prefix if using S3) containing images') parser.add_argument('-c','--collection', type=str, help='image collection to create or add images to') + parser.add_argument('-b', '--bucket', type=str, help='the bucket when importing from S3') parser.add_argument('-u','--user', type=str, help='username for any created annotations (user must exist)') + origin_choices = [StorageOrigin.LOCAL.value, StorageOrigin.S3.value,] + parser.add_argument('-o','--origin', type=str, choices=origin_choices, default='local', help='storage type to use (local or s3)') + + def scan_local(self, directory): + unlabeled = [] + labeled = {} + folders = [] + + # First, loop through the directory and sort entries into unlabeled files and top level folders + for entry in os.listdir(directory): + name, ext = os.path.splitext(entry) + if ext in ALLOWED_FILE_TYPES: + unlabeled.append(entry) + continue + + path = os.path.join(directory, entry) + if os.path.isdir(path): + folders.append(entry) + continue + + # For each top level folder, use that as the label and get all the files inside + for folder in folders: + labeled[folder] = [] + + path = os.path.join(directory, folder) + for entry in os.listdir(path): + name, ext = os.path.splitext(entry) + if ext not in ALLOWED_FILE_TYPES: + continue + + labeled[folder].append(entry) + + return unlabeled, labeled + + # TODO: Handle/check using a subfolder as the directory + def scan_s3(self, s3_client, bucket, directory): + unlabeled = [] + labeled = {} + folders = [] + + # TODO: Questions for the group. Do we need to support local and S3 at the same time? or just one or the other? + # TODO: The method may need to be "list_objects" instead of "list_objects_v2" due to Vast permissions + paginator = s3_client.get_paginator('list_objects_v2') + + for page in paginator.paginate(Bucket=bucket, Delimiter=S3_DELIMITER, Prefix=directory): + for cp in page.get("CommonPrefixes", []): + folder = cp.get("Prefix") + folders.append(folder) + + for obj in page.get("Contents", []): + filename = obj['Key'] + + # Ignore folders and files within folders + if filename.endswith('/'): + continue + + name, ext = os.path.splitext(filename) + if ext not in ALLOWED_FILE_TYPES: + continue + + unlabeled.append(filename) + + for folder in folders: + key = folder.rstrip("/") + labeled[key] = [] + prefix = os.path.join(directory, folder) + for page in paginator.paginate(Bucket=bucket, Delimiter=S3_DELIMITER, Prefix=prefix): + + for obj in page.get("Contents", []): + # TODO: If we're not in root, we might need to also lstrip() the directory? + filename = obj['Key'].lstrip(prefix) + + # Ignore subfolders and files within subfolders + if "/" in filename: + continue + + name, ext = os.path.splitext(filename) + if ext not in ALLOWED_FILE_TYPES: + continue + + labeled[key].append(filename) + + return unlabeled, labeled + def handle(self, *args, **options): # handle arguments directory = options['directory'] collection_name = options.get('collection') username = options.get('user') + origin = options.get('origin') + bucket = options.get('bucket') + s3_client = S3Service.get_client() if origin == StorageOrigin.S3.value else None + # validate arguments - if not os.path.exists(directory): + + # Only verify the path physically exists when using local storage + if origin == StorageOrigin.LOCAL.value and not os.path.exists(directory): raise CommandError('specified directory does not exist') + + # When using S3 for storage, a bucket is required + if origin == StorageOrigin.S3.value and (bucket or "") == "": + raise CommandError('bucket must be specified') + user = None if username: try: user = User.objects.get(username=username) except: raise CommandError(f'unable to retrieve user {username}') + collection = None if collection_name is not None: - collection, created = ImageCollection.objects.get_or_create( - name=collection_name) - # scan directory and one level of subdirectories - def scan(dir): - result = [] - for fn in os.listdir(dir): - name, ext = os.path.splitext(fn) - if ext not in ['.png', '.jpg']: - continue - result.append(fn) - return result - unlabeled = scan(directory) - labeled = {} - for n in os.listdir(directory): - if os.path.isdir(os.path.join(directory, n)): - label = n - label_dir_path = os.path.join(directory, n) - labeled[label] = scan(label_dir_path) + collection, _ = ImageCollection.objects.get_or_create(name=collection_name) + + if origin == StorageOrigin.S3.value: + unlabeled, labeled = self.scan_s3(s3_client, bucket, directory) + else: + unlabeled, labeled = self.scan_local(directory) + if len(labeled) > 0 and not user: raise CommandError('labeled ROIs found but no username specified') + print(f'found {len(unlabeled)} unlabeled images and {len(labeled)} label directories') + # now create ROI records in the database print(f'importing {len(unlabeled)} unlabeled ROIs...') for roi_filename in unlabeled: path = os.path.join(directory, roi_filename) - roi = ROI.objects.create_or_update_roi(path, collection=collection) + _ = ROI.objects.create_or_update_roi(path, collection=collection, origin=origin, bucket=bucket, s3_client=s3_client) + for label_name, rois in labeled.items(): print(f'importing {len(rois)} ROIs labeled "{label_name}"...') - label, created = Label.objects.get_or_create(name=label_name) + label, _ = Label.objects.get_or_create(name=label_name) for roi_filename in rois: roi_path = os.path.join(directory, label_name, roi_filename) - roi = ROI.objects.create_or_update_roi(roi_path, collection=collection) + roi = ROI.objects.create_or_update_roi(roi_path, collection=collection, origin=origin, bucket=bucket, s3_client=s3_client) Annotation.objects.create_or_verify(roi, label, user) - - - diff --git a/core/models.py b/core/models.py index a6d925b..c6eb77e 100644 --- a/core/models.py +++ b/core/models.py @@ -1,4 +1,5 @@ import os +import io from datetime import datetime import json @@ -11,6 +12,8 @@ from django.utils import timezone from PIL import Image +import boto3 +from constants import StorageOrigin class ROIQuerySet(models.QuerySet): @@ -61,11 +64,17 @@ class ROIManager(models.Manager): def get_queryset(self): return ROIQuerySet(self.model, using=self._db) - def create_or_update_roi(self, path, collection=None): + def create_or_update_roi(self, path, collection=None, origin='', bucket=None, s3_client=None): if not path.endswith('.png') and not path.endswith('.jpg'): raise NameError(f'{path} is not the path to a ROI image') roi_id = os.path.basename(path)[:-4] # we know it ends with a 3-character image extension + # TODO: For testing - outside of loop so it always runs + # width, height = self.calculate_dimensions(path, origin, bucket, s3_client) + + # TODO: Remove debugging line + # print(f"- {path} is {width}x{height}") + with transaction.atomic(): try: roi = self.get(roi_id=roi_id) @@ -76,8 +85,7 @@ def create_or_update_roi(self, path, collection=None): if not roi.collections.filter(id=collection.id).exists(): roi.collections.add(collection) except ROI.DoesNotExist: - with Image.open(path) as image: - width, height = image.size + width, height = self.calculate_dimensions(path, origin, bucket, s3_client) roi = self.create(roi_id=roi_id, width=width, height=height, path=path) if collection is not None: roi.collections.add(collection) @@ -89,15 +97,28 @@ def with_label(self, label): def unlabeled(self): return self.get_queryset().unlabeled() + def calculate_dimensions(self, path, origin, bucket=None, s3_client=None): + try: + if origin == StorageOrigin.S3.value: + response = s3_client.get_object(Bucket=bucket, Key=path) + data = response["Body"].read() + + with Image.open(io.BytesIO(data)) as image: + return image.size + else: + with Image.open(path) as image: + return image.size + except Exception as e: + return 0, 0 + class ROI(models.Model): roi_id = models.CharField(max_length=255, unique=True) width = models.IntegerField() height = models.IntegerField() path = models.CharField(max_length=512) - winning_annotation = models.ForeignKey('Annotation', on_delete=models.CASCADE, null=True,\ + winning_annotation = models.ForeignKey('Annotation', on_delete=models.CASCADE, null=True, \ related_name='associated_roi') - objects = ROIManager() @property diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml index 90b7f9e..d53c3a3 100644 --- a/docker-compose.prod.yml +++ b/docker-compose.prod.yml @@ -3,6 +3,8 @@ version: '3.9' services: web: image: harbor-registry.whoi.edu/photic/photic_web:1.0 + env_file: + - .env command: python manage.py runserver 0.0.0.0:8000 volumes: - ${ROI_PATH}:/rois diff --git a/docker-compose.yml b/docker-compose.yml index 33edd95..6946544 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -16,6 +16,8 @@ services: context: . command: python manage.py runserver 0.0.0.0:8000 + env_file: + - .env volumes: - .:/app - ${ROI_PATH}:/rois diff --git a/env.example b/env.example index 164264e..136362d 100644 --- a/env.example +++ b/env.example @@ -19,3 +19,7 @@ POSTGRES_DATA_PATH=/srv/postgresql/data # Location of SSL certificate files SSL_CERT=/etc/ssl/example.crt SSL_KEY=/etc/ssl/example.key + +# Credentials for S3/Vast +S3_ACCESS_KEY= +S3_SECRET_KEY= \ No newline at end of file diff --git a/photic/settings.py b/photic/settings.py index 29923f8..bed6f04 100644 --- a/photic/settings.py +++ b/photic/settings.py @@ -135,6 +135,10 @@ LOGIN_REDIRECT_URL = "/" LOGIN_URL = "/manage/login" +# S3/Vast credentials +S3_ACCESS_KEY = os.environ.get('S3_ACCESS_KEY') +S3_SECRET_KEY = os.environ.get('S3_SECRET_KEY') + try: from .local_settings import * except ImportError as e: diff --git a/requirements.txt b/requirements.txt index df2b4d8..7321559 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,5 @@ Django==5.2.3 psycopg==3.2.9 Pillow +boto3==1.38.46 git+https://github.com/joefutrelle/pyifcb \ No newline at end of file diff --git a/services/__init__.py b/services/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/services/s3_service.py b/services/s3_service.py new file mode 100644 index 0000000..335324b --- /dev/null +++ b/services/s3_service.py @@ -0,0 +1,9 @@ +from django.conf import settings +import boto3 + +class S3Service: + @staticmethod + def get_client(): + session = boto3.session.Session() + + return session.client("s3", aws_access_key_id=settings.S3_ACCESS_KEY, aws_secret_access_key=settings.S3_SECRET_KEY) \ No newline at end of file From f9cc4b7dcfae9de4377697c0887d9d98b5366d4d Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Sun, 10 Aug 2025 21:16:30 -0700 Subject: [PATCH 2/7] allow starting within a subfolder for S3 images --- core/management/commands/importrois.py | 38 ++++++++++++++++++++------ core/models.py | 5 +++- 2 files changed, 34 insertions(+), 9 deletions(-) diff --git a/core/management/commands/importrois.py b/core/management/commands/importrois.py index 686b03d..ed1d1c5 100644 --- a/core/management/commands/importrois.py +++ b/core/management/commands/importrois.py @@ -53,19 +53,21 @@ def scan_local(self, directory): return unlabeled, labeled - # TODO: Handle/check using a subfolder as the directory def scan_s3(self, s3_client, bucket, directory): unlabeled = [] labeled = {} folders = [] - # TODO: Questions for the group. Do we need to support local and S3 at the same time? or just one or the other? # TODO: The method may need to be "list_objects" instead of "list_objects_v2" due to Vast permissions paginator = s3_client.get_paginator('list_objects_v2') for page in paginator.paginate(Bucket=bucket, Delimiter=S3_DELIMITER, Prefix=directory): for cp in page.get("CommonPrefixes", []): folder = cp.get("Prefix") + + if directory != "": + folder = folder.removeprefix(directory) + folders.append(folder) for obj in page.get("Contents", []): @@ -75,6 +77,10 @@ def scan_s3(self, s3_client, bucket, directory): if filename.endswith('/'): continue + # Remove the directory/path if there is one + if directory != "": + filename = filename.removeprefix(directory) + name, ext = os.path.splitext(filename) if ext not in ALLOWED_FILE_TYPES: continue @@ -85,16 +91,20 @@ def scan_s3(self, s3_client, bucket, directory): key = folder.rstrip("/") labeled[key] = [] prefix = os.path.join(directory, folder) + for page in paginator.paginate(Bucket=bucket, Delimiter=S3_DELIMITER, Prefix=prefix): for obj in page.get("Contents", []): - # TODO: If we're not in root, we might need to also lstrip() the directory? - filename = obj['Key'].lstrip(prefix) + filename = obj['Key'].removeprefix(prefix) # Ignore subfolders and files within subfolders if "/" in filename: continue + # Remove the directory/path if there is one + if directory != "": + filename = filename.removeprefix(directory) + name, ext = os.path.splitext(filename) if ext not in ALLOWED_FILE_TYPES: continue @@ -122,6 +132,17 @@ def handle(self, *args, **options): if origin == StorageOrigin.S3.value and (bucket or "") == "": raise CommandError('bucket must be specified') + # For S3, if the user wants to look in root, the options are a bit unclear so we should allow them to an empty + # string (with ""), or a single slash. However, as far as AWS is concerned, directory in this case should be + # set to an empty string (slash will not work properly) + if origin == StorageOrigin.S3.value and directory == "/": + directory = "" + + # For S3, if the user entered a directory, it must end in a trailing slash. Rather than require it, we can just + # add one if it's not there + if origin == StorageOrigin.S3.value and directory != "" and not directory.endswith("/"): + directory += "/" + user = None if username: try: @@ -144,10 +165,11 @@ def handle(self, *args, **options): print(f'found {len(unlabeled)} unlabeled images and {len(labeled)} label directories') # now create ROI records in the database - print(f'importing {len(unlabeled)} unlabeled ROIs...') - for roi_filename in unlabeled: - path = os.path.join(directory, roi_filename) - _ = ROI.objects.create_or_update_roi(path, collection=collection, origin=origin, bucket=bucket, s3_client=s3_client) + if len(unlabeled) > 0: + print(f'importing {len(unlabeled)} unlabeled ROIs...') + for roi_filename in unlabeled: + path = os.path.join(directory, roi_filename) + _ = ROI.objects.create_or_update_roi(path, collection=collection, origin=origin, bucket=bucket, s3_client=s3_client) for label_name, rois in labeled.items(): print(f'importing {len(rois)} ROIs labeled "{label_name}"...') diff --git a/core/models.py b/core/models.py index c6eb77e..1d4170d 100644 --- a/core/models.py +++ b/core/models.py @@ -72,7 +72,8 @@ def create_or_update_roi(self, path, collection=None, origin='', bucket=None, s3 # TODO: For testing - outside of loop so it always runs # width, height = self.calculate_dimensions(path, origin, bucket, s3_client) - # TODO: Remove debugging line + # TODO: Remove debugging lines + # print(f"- ROI ID: {roi_id}") # print(f"- {path} is {width}x{height}") with transaction.atomic(): @@ -101,6 +102,7 @@ def calculate_dimensions(self, path, origin, bucket=None, s3_client=None): try: if origin == StorageOrigin.S3.value: response = s3_client.get_object(Bucket=bucket, Key=path) + data = response["Body"].read() with Image.open(io.BytesIO(data)) as image: @@ -109,6 +111,7 @@ def calculate_dimensions(self, path, origin, bucket=None, s3_client=None): with Image.open(path) as image: return image.size except Exception as e: + print(f"Failed to download or read image from S3: {e}") return 0, 0 From beddbe5c47289d1a1b980f500f5028610bdc6baf Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Mon, 11 Aug 2025 20:33:54 -0700 Subject: [PATCH 3/7] add new origin and bucket fields on ROIs --- core/migrations/0009_roi_bucket_roi_origin.py | 23 +++++++++++++++++++ core/models.py | 17 ++++++++++++-- 2 files changed, 38 insertions(+), 2 deletions(-) create mode 100644 core/migrations/0009_roi_bucket_roi_origin.py diff --git a/core/migrations/0009_roi_bucket_roi_origin.py b/core/migrations/0009_roi_bucket_roi_origin.py new file mode 100644 index 0000000..febfe3e --- /dev/null +++ b/core/migrations/0009_roi_bucket_roi_origin.py @@ -0,0 +1,23 @@ +# Generated by Django 5.2.3 on 2025-08-12 03:30 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('core', '0008_roi_winning_annotation'), + ] + + operations = [ + migrations.AddField( + model_name='roi', + name='bucket', + field=models.CharField(blank=True, max_length=100, null=True), + ), + migrations.AddField( + model_name='roi', + name='origin', + field=models.CharField(choices=[('LOCAL', 'local'), ('S3', 's3')], default='local', max_length=50), + ), + ] diff --git a/core/models.py b/core/models.py index 1d4170d..c3c5ffc 100644 --- a/core/models.py +++ b/core/models.py @@ -79,15 +79,23 @@ def create_or_update_roi(self, path, collection=None, origin='', bucket=None, s3 with transaction.atomic(): try: roi = self.get(roi_id=roi_id) - if roi.path != path: + if roi.path != path or roi.origin != origin or roi.bucket != bucket: roi.path = path + roi.bucket = bucket + roi.origin = origin roi.save() if collection is not None: if not roi.collections.filter(id=collection.id).exists(): roi.collections.add(collection) except ROI.DoesNotExist: width, height = self.calculate_dimensions(path, origin, bucket, s3_client) - roi = self.create(roi_id=roi_id, width=width, height=height, path=path) + roi = self.create( + roi_id=roi_id, + width=width, + height=height, + path=path, + origin=origin, + bucket=bucket) if collection is not None: roi.collections.add(collection) return roi @@ -122,6 +130,11 @@ class ROI(models.Model): path = models.CharField(max_length=512) winning_annotation = models.ForeignKey('Annotation', on_delete=models.CASCADE, null=True, \ related_name='associated_roi') + bucket = models.CharField(max_length=100, null=True, blank=True) + origin = models.CharField(max_length=50, null=False, blank=False, default=StorageOrigin.LOCAL.value, choices=[ + (StorageOrigin.LOCAL.name, StorageOrigin.LOCAL.value), + (StorageOrigin.S3.name, StorageOrigin.S3.value), + ]) objects = ROIManager() @property From 341c2b3d21e1300b3dd0beba1655055770cacba0 Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Mon, 11 Aug 2025 21:12:16 -0700 Subject: [PATCH 4/7] add file wrapper to download from S3 --- web/urls.py | 1 + web/views.py | 43 ++++++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/web/urls.py b/web/urls.py index 1a43746..112c745 100644 --- a/web/urls.py +++ b/web/urls.py @@ -12,6 +12,7 @@ path('api/move_or_copy_to_collection', views.move_or_copy_to_collection, name='move_or_copy_to_collection'), path('api/get_labels', views.get_labels, name='get_labels'), path('api/get_collections', views.get_collections, name='get_collections'), + path('api/view_image//', views.view_image, name='view_image'), # external REST endpoints path('api/winning_annotations/', views.api_winning_annotations, name='winning_annotations'), diff --git a/web/views.py b/web/views.py index 7f0f2ad..69899ef 100644 --- a/web/views.py +++ b/web/views.py @@ -1,3 +1,4 @@ +import os import json from django.contrib.auth.models import User @@ -6,13 +7,18 @@ from django.views.decorators.csrf import csrf_exempt from django.views.decorators.http import require_POST +from constants import StorageOrigin, ALLOWED_FILE_TYPES from core.models import Annotation, Label, ImageCollection, ROI, Annotator from django.core.paginator import Paginator, EmptyPage, PageNotAnInteger +from services.s3_service import S3Service import logging log = logging.getLogger(__name__) +# This is initialized outside of any view methods so its only created once +s3_client = S3Service.get_client() + def index(request): annotation_users = User.objects.all() collections = ImageCollection.objects.all() @@ -61,7 +67,7 @@ def roi_list(request): rois = rois.order_by(*sortby_query) roi_count = rois.count() - rois_list = rois.values_list('id', 'path') + rois_list = rois.values('id', 'path', 'origin', 'bucket') paginator = Paginator(rois_list, 1000) @@ -72,10 +78,20 @@ def roi_list(request): except EmptyPage: roi_page = paginator.page(paginator.num_pages) - roi_records = [{ - 'id': rid, - 'path': path, - } for rid, path in roi_page] + roi_records = [] + for roi in roi_page: + # S3 based images need to use a custom wrapper + if roi["origin"] == StorageOrigin.S3.value: + bucket = roi["bucket"] + path = roi["path"] + url = f"/api/view_image/{bucket}/{path}" + else: + url = roi["path"] + + roi_records.append({ + "id": roi["id"], + "path": url, + }) return JsonResponse({ 'rois': roi_records, @@ -237,3 +253,20 @@ def api_winning_annotations(request, collection_name): response['Content-Disposition'] = f'attachment; filename="{collection_name}_annotations.csv"' return response + +def view_image(request, bucket, path): + # Only serve up images with valid extensions + name, ext = os.path.splitext(path) + if ext not in ALLOWED_FILE_TYPES: + return HttpResponseBadRequest() + + mime_type = "image/jpg" if ext == ".jpg" else "image/png" + + try: + response = s3_client.get_object(Bucket=bucket, Key=path) + + data = response["Body"].read() + + return HttpResponse(data, content_type=mime_type) + except: + return HttpResponseBadRequest("Image unavailable") From 6b048b52dc3f10b5d7f872e8b6143123b290a8f6 Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Sun, 24 Aug 2025 13:49:51 -0700 Subject: [PATCH 5/7] allow endpoint urls for AWS config --- core/management/commands/importrois.py | 5 +++-- core/models.py | 7 ------- env.example | 5 +++-- photic/settings.py | 1 + services/s3_service.py | 6 +++++- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/core/management/commands/importrois.py b/core/management/commands/importrois.py index ed1d1c5..b610d3b 100644 --- a/core/management/commands/importrois.py +++ b/core/management/commands/importrois.py @@ -58,8 +58,9 @@ def scan_s3(self, s3_client, bucket, directory): labeled = {} folders = [] - # TODO: The method may need to be "list_objects" instead of "list_objects_v2" due to Vast permissions - paginator = s3_client.get_paginator('list_objects_v2') + # Intentionally using "list_objects" here instead of "list_objects_v2" to work around potential permission or + # feature restrictions when using VAST as the backend storage resource + paginator = s3_client.get_paginator('list_objects') for page in paginator.paginate(Bucket=bucket, Delimiter=S3_DELIMITER, Prefix=directory): for cp in page.get("CommonPrefixes", []): diff --git a/core/models.py b/core/models.py index c3c5ffc..ba4bc71 100644 --- a/core/models.py +++ b/core/models.py @@ -69,13 +69,6 @@ def create_or_update_roi(self, path, collection=None, origin='', bucket=None, s3 raise NameError(f'{path} is not the path to a ROI image') roi_id = os.path.basename(path)[:-4] # we know it ends with a 3-character image extension - # TODO: For testing - outside of loop so it always runs - # width, height = self.calculate_dimensions(path, origin, bucket, s3_client) - - # TODO: Remove debugging lines - # print(f"- ROI ID: {roi_id}") - # print(f"- {path} is {width}x{height}") - with transaction.atomic(): try: roi = self.get(roi_id=roi_id) diff --git a/env.example b/env.example index 136362d..0d5eabc 100644 --- a/env.example +++ b/env.example @@ -20,6 +20,7 @@ POSTGRES_DATA_PATH=/srv/postgresql/data SSL_CERT=/etc/ssl/example.crt SSL_KEY=/etc/ssl/example.key -# Credentials for S3/Vast +# Credentials for S3/Vast (Vast requires an endpoint url, AWS does not) S3_ACCESS_KEY= -S3_SECRET_KEY= \ No newline at end of file +S3_SECRET_KEY= +S3_ENDPOINT_URL= \ No newline at end of file diff --git a/photic/settings.py b/photic/settings.py index bed6f04..f9890d5 100644 --- a/photic/settings.py +++ b/photic/settings.py @@ -138,6 +138,7 @@ # S3/Vast credentials S3_ACCESS_KEY = os.environ.get('S3_ACCESS_KEY') S3_SECRET_KEY = os.environ.get('S3_SECRET_KEY') +S3_ENDPOINT_URL = os.environ.get('S3_ENDPOINT_URL') try: from .local_settings import * diff --git a/services/s3_service.py b/services/s3_service.py index 335324b..1a1c7de 100644 --- a/services/s3_service.py +++ b/services/s3_service.py @@ -6,4 +6,8 @@ class S3Service: def get_client(): session = boto3.session.Session() - return session.client("s3", aws_access_key_id=settings.S3_ACCESS_KEY, aws_secret_access_key=settings.S3_SECRET_KEY) \ No newline at end of file + return session.client( + "s3", + aws_access_key_id=settings.S3_ACCESS_KEY, + aws_secret_access_key=settings.S3_SECRET_KEY, + endpoint_url=settings.S3_ENDPOINT_URL) \ No newline at end of file From b1afbe2c367be5b6a6f07e3ea0976f2ad3351a9b Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Mon, 29 Sep 2025 14:03:59 -0700 Subject: [PATCH 6/7] add roi filtering --- core/management/commands/importrois.py | 53 ++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 8 deletions(-) diff --git a/core/management/commands/importrois.py b/core/management/commands/importrois.py index b610d3b..8910738 100644 --- a/core/management/commands/importrois.py +++ b/core/management/commands/importrois.py @@ -1,5 +1,6 @@ import os import boto3 +from pathlib import Path from django.core.management.base import BaseCommand, CommandError from django.contrib.auth.models import User @@ -18,11 +19,22 @@ def add_arguments(self, parser): parser.add_argument('-c','--collection', type=str, help='image collection to create or add images to') parser.add_argument('-b', '--bucket', type=str, help='the bucket when importing from S3') parser.add_argument('-u','--user', type=str, help='username for any created annotations (user must exist)') + parser.add_argument('--include-rois', type=str, help='only import rois matching a comma separated list') + parser.add_argument('--include-rois-file', type=str, help='only import rois found in a file (new line separated)') + parser.add_argument('--prefix', type=str, help='a prefix to prepend to all ROIs used with --include-rois or --include-rois-file') origin_choices = [StorageOrigin.LOCAL.value, StorageOrigin.S3.value,] parser.add_argument('-o','--origin', type=str, choices=origin_choices, default='local', help='storage type to use (local or s3)') - def scan_local(self, directory): + def is_roi_included(self, filename, included_rois): + if not included_rois: + return True + + is_included = any(roi in filename for roi in included_rois) + + return is_included + + def scan_local(self, directory, included_rois=None): unlabeled = [] labeled = {} folders = [] @@ -30,7 +42,7 @@ def scan_local(self, directory): # First, loop through the directory and sort entries into unlabeled files and top level folders for entry in os.listdir(directory): name, ext = os.path.splitext(entry) - if ext in ALLOWED_FILE_TYPES: + if ext in ALLOWED_FILE_TYPES and self.is_roi_included(name, included_rois): unlabeled.append(entry) continue @@ -46,14 +58,14 @@ def scan_local(self, directory): path = os.path.join(directory, folder) for entry in os.listdir(path): name, ext = os.path.splitext(entry) - if ext not in ALLOWED_FILE_TYPES: + if ext not in ALLOWED_FILE_TYPES or not self.is_roi_included(name, included_rois): continue labeled[folder].append(entry) return unlabeled, labeled - def scan_s3(self, s3_client, bucket, directory): + def scan_s3(self, s3_client, bucket, directory, included_rois=None): unlabeled = [] labeled = {} folders = [] @@ -83,7 +95,7 @@ def scan_s3(self, s3_client, bucket, directory): filename = filename.removeprefix(directory) name, ext = os.path.splitext(filename) - if ext not in ALLOWED_FILE_TYPES: + if ext not in ALLOWED_FILE_TYPES or not self.is_roi_included(name, included_rois): continue unlabeled.append(filename) @@ -107,7 +119,7 @@ def scan_s3(self, s3_client, bucket, directory): filename = filename.removeprefix(directory) name, ext = os.path.splitext(filename) - if ext not in ALLOWED_FILE_TYPES: + if ext not in ALLOWED_FILE_TYPES or not self.is_roi_included(name, included_rois): continue labeled[key].append(filename) @@ -121,10 +133,21 @@ def handle(self, *args, **options): username = options.get('user') origin = options.get('origin') bucket = options.get('bucket') + rois_list = options.get('include_rois') + rois_file = options.get('include_rois_file') + prefix = options.get('prefix') or '' s3_client = S3Service.get_client() if origin == StorageOrigin.S3.value else None # validate arguments + if rois_list and rois_file: + raise CommandError('the include-rois and include-rois-file arguments cannot be used at the same time') + + if rois_file: + path = Path(rois_file) + if not path.exists() or not path.is_file(): + raise CommandError(f"file not found: {path}") + # Only verify the path physically exists when using local storage if origin == StorageOrigin.LOCAL.value and not os.path.exists(directory): raise CommandError('specified directory does not exist') @@ -144,6 +167,20 @@ def handle(self, *args, **options): if origin == StorageOrigin.S3.value and directory != "" and not directory.endswith("/"): directory += "/" + # Load any filters + included_rois = [] + + if rois_list: + included_rois = [prefix + item.strip() for item in rois_list.split(',') if item.strip()] + + if rois_file: + path = Path(rois_file) + try: + with path.open('r') as f: + included_rois = [prefix + line.strip() for line in f if line.strip()] + except Exception as e: + raise CommandError(f"could not read file {rois_file}: {e}") + user = None if username: try: @@ -156,9 +193,9 @@ def handle(self, *args, **options): collection, _ = ImageCollection.objects.get_or_create(name=collection_name) if origin == StorageOrigin.S3.value: - unlabeled, labeled = self.scan_s3(s3_client, bucket, directory) + unlabeled, labeled = self.scan_s3(s3_client, bucket, directory, included_rois=included_rois) else: - unlabeled, labeled = self.scan_local(directory) + unlabeled, labeled = self.scan_local(directory, included_rois=included_rois) if len(labeled) > 0 and not user: raise CommandError('labeled ROIs found but no username specified') From 2e34c05a0b2d5ce73419a9eed76799069b2a918d Mon Sep 17 00:00:00 2001 From: Mike Chagnon Date: Fri, 10 Oct 2025 13:10:27 -0700 Subject: [PATCH 7/7] use value for enums --- core/models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/core/models.py b/core/models.py index ba4bc71..cb140d4 100644 --- a/core/models.py +++ b/core/models.py @@ -125,8 +125,8 @@ class ROI(models.Model): related_name='associated_roi') bucket = models.CharField(max_length=100, null=True, blank=True) origin = models.CharField(max_length=50, null=False, blank=False, default=StorageOrigin.LOCAL.value, choices=[ - (StorageOrigin.LOCAL.name, StorageOrigin.LOCAL.value), - (StorageOrigin.S3.name, StorageOrigin.S3.value), + (StorageOrigin.LOCAL.value, StorageOrigin.LOCAL.value), + (StorageOrigin.S3.value, StorageOrigin.S3.value), ]) objects = ROIManager()