diff --git a/.dockerignore b/.dockerignore index b686b699..83cc027f 100644 --- a/.dockerignore +++ b/.dockerignore @@ -4,7 +4,6 @@ db venv __pycache__ -migrations file_store storage worker_tmp diff --git a/Makefile b/Makefile index 711ae97d..872eb576 100644 --- a/Makefile +++ b/Makefile @@ -1,41 +1,55 @@ -prod: - docker compose up -d +include .env -prod-new: - docker compose up -d --build +DOCKER_IMAGE := dc_app +DB_CONTAINER := iss-main-db +PROD_FILE := -f docker-compose.yml +DEV_FILE := -f docker-compose.dev.yml +TEST_FILE := -f docker-compose.test.yml -prod-stop: - docker compose down +# MAIN +build: + docker compose ${PROD_FILE} build --no-cache -prod-restart: - make prod-stop && make prod +start: + docker compose ${PROD_FILE} up -d -dev: - docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d +stop: + docker compose ${PROD_FILE} down + +start-new: build start +restart: stop start -dev-new: - docker compose -f docker-compose.yml -f docker-compose.dev.yml up -d --build +# DEV +dev: + docker compose ${PROD_FILE} ${DEV_FILE} up -d dev-stop: - docker compose -f docker-compose.yml -f docker-compose.dev.yml down + docker compose ${PROD_FILE} ${DEV_FILE} down -dev-restart: - make dev-stop && make dev +dev-new: build dev +dev-restart: dev-stop dev -test: - docker compose -f docker-compose.test.yml up -d --build +# TEST +test-start: + docker compose ${TEST_FILE} up -d + +test-build: + docker compose ${TEST_FILE} build --no-cache test-stop: - docker compose -f docker-compose.test.yml down + docker compose ${TEST_FILE} down + +test: test-build test-start +test-restart: test-stop test -test-restart: - make test-stop && make test +# UTILS +dump-schema: + docker exec ${DB_CONTAINER} pg_dump -U postgres -d ${DB_APP_DB_NAME} --schema-only > dump_schema -appdb-dump-schema: - docker exec iss-main-db pg_dump -U postgres -d iss_app_db --schema-only > app_dump_schema +dump-data: + docker exec ${DB_CONTAINER} pg_dump -U postgres -d ${DB_APP_DB_NAME} --data-only > dump_data -appdb-dump-data: - docker exec iss-main-db pg_dump -U postgres -d iss_app_db --data-only > app_dump_data +dump-all: dump-schema dump-data -dump-database: - make appdb-dump-schema && make appdb-dump-data +init-admin: + docker exec -it iss-back ./manage.py createsuperuser diff --git a/README.md b/README.md index f4f05ba5..e6505aed 100644 --- a/README.md +++ b/README.md @@ -1,94 +1,117 @@ + + # ISS Data Collection Tool -## Makefile convinient commands (**docker/docker-compose** and **.env file** are supposed to be set): - -- start project: - `make prod` -- stop project: - `make prod-stop` -- restart project: - `make prod-restart` -- start project with rebuild: - `make prod-new` -- start development project: - `make dev` -- stop development project: - `make dev-stop` -- restart development project: - `make dev-restart` -- start project with rebuild: - `make dev-new` -- start tests: - `make test` -- stop tests: - `make test-stop` -- restart tests: - `make test-restart` -- dump main apps database schema: - `make appdb-dump-schema` -- dump main apps database data: - `make appdb-dump-data` -- dump main apps database (schema and data separately): - `make dump-database` - -## Prerequisites: - -- **docker/docker-compose** installed -- create and fill **.env** file from sample - -For local development copying sample is enough: -`cp .env.sample .env` - -## Running Application - -Docker Compose file: docker-compose.yml - -Docker files: - -- Dockerfile.backend -- Dockerfile.frontend -- Dockerfile.storage - -Command: -`docker-compose up -d --build` - -## Development - -Docker Compose file: docker-compose.dev.yml - -Docker files: - -- Dockerfile.backend -- Dockerfile.frontend -- Dockerfile.storage - -Command: -`docker-compose -f docker-compose.dev.yml up -d --build` - -## Testing - -Docker Compose file: docker-compose.test.yml - -Docker files: - -- Dockerfile.tests - -Command (rebuild is important): -`docker-compose -f docker-compose.test.yml up -d --build` - -Available tests: - -- Main Backend: - `docker exec iss-test-back ./manage.py test` -- Storage Backend: - `docker exec iss-test-storage python3 src/test.py` -- Frontend: - `docker exec iss-test-front npm test` -- Selenium Tests (browser emulation): - `docker exec iss-tests python3 test.py` -- Python linter (no output means the lint test is passed): - `docker exec iss-tests flake8` -- JavaScript linter: - `docker exec iss-test-front npm run lint` -- JavaScript ts compiler checker: - `docker exec iss-test-front npm run compile` +An end-to-end dataset collection system designed for scalability. Supports multi-role workflows, structured label taxonomies, validation cycles, goal tracking, and archive exports. Ideal for organizations building private, high-integrity datasets with distributed teams of data collectors. + +πŸ›  Currently in active development. Ideal for internal use, pilots, and research-stage projects. + +## 🧩 Features + +This platform enables you to: + +- Create projects with custom label systems +- Upload images/videos and assign them to labeling schemas +- validate uploaded files +- Set collection goals +- Track progress with stats +- Export data + +## πŸ“š Documentation & Examples + +See [docs/](/docs) for manuals and walkthroughs: + +- [Quickstart](/docs/quickstart.md) +- [Projects](/docs/projects.md) +- [Labels](/docs/labels.md) +- [Users and Roles](/docs/users.md) +- [Uploads](/docs/uploads.md) +- [Validation](/docs/validation.md) +- [Goals](/docs/goals.md) +- [Statistics](/docs/statistics.md) +- [Downloads](/docs/downloads.md) + +## βš™οΈ Architecture + +- **Main Backend:** Django + PostgreSQL +- **File Backend:** FastAPI + MongoDB (blob storage) +- **Task Queue:** Celery + Redis +- **Frontend:** React +- **Deployment:** Docker, Compose, Makefile-based workflow + +## πŸ“ Folder Structure (Top Level) + +- `backend-app/` β€” main Django app +- `frontend-app/` β€” React app +- `storage-app/` β€” FastAPI blob service +- `scripts/` β€” app handy tools +- `tests/` β€” global tests +- `nginx/`, `redis/` β€” infrastructure configs +- `Makefile` β€” common commands +- `docker-compose*.yml` β€” dev/test/prod setup + +## πŸš€ Getting Started + +### Prerequisites + +- Docker + Docker Compose installed +- `.env` file created from `.env.sample` + +```bash +cp .env.sample .env +``` + +### Build & Run +```bash +make build # build all services +make start # start in prod mode +make dev # start in dev mode +``` +Full command list available in the Makefile section below. + +## πŸ§ͺ Testing +Run with: +```bash +docker exec iss-test-back ./manage.py test # Main Backend +docker exec iss-test-storage python3 src/test.py # Storage Backend +docker exec iss-test-front npm test # Frontend +docker exec iss-tests flake8 # Python linter +docker exec iss-test-front npm run lint # JavaScript linter +docker exec iss-test-front npm run compile # JavaScript ts compiler checker +``` + +## πŸ› οΈ Makefile Commands + +General +```bash +make build # build all services +make start # start in prod mode +make stop # stop prod mode +make start-new # rebuild and start services in prod mode +make restart # stop and start in prod mode +``` + +Dev Mode +```bash +make dev # start in dev mode +make dev-stop # stop dev mode +make dev-new # rebuild and start services in dev mode +make dev-restart # stop and start in dev mode +``` + +Tests +```bash +make test # rebuild and start services in test mode +make test-start # start in test mode +make test-build # build test mode +make test-stop # stop test mode +make test-restart # stop and start services in test mode +``` + +Utils +```bash +make dump-schema # dump database schema +make dump-data # dump database data +make dump-all # dump database both schema and data +make init-admin # create new superuser +``` diff --git a/docker-compose.yml b/docker-compose.yml index 58829283..64e7d5ac 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -6,7 +6,7 @@ services: image: postgres:15.3-alpine restart: always environment: - POSTGRES_DB: ${DB_APP_DB_NAME:-app_db} + POSTGRES_DB: ${DB_APP_DB_NAME} POSTGRES_HOST_AUTH_METHOD: ${DB_APP_AUTH_METHOD:-trust} volumes: - ./db:/var/lib/postgresql/data @@ -35,7 +35,7 @@ services: restart: always environment: DB_HOST: iss-main-db - DB_NAME: ${DB_APP_DB_NAME:-app_db} + DB_NAME: ${DB_APP_DB_NAME} APP_STORAGE_URL: iss-storage:${STORAGE_PORT:-9000} SERVER_ORIGINS: ${SERVER_ORIGINS:-localhost} SECRET_KEY: ${SECRET_KEY} diff --git a/docs/downloads.md b/docs/downloads.md new file mode 100644 index 00000000..20146588 --- /dev/null +++ b/docs/downloads.md @@ -0,0 +1,48 @@ +# πŸ“₯ Downloads + +This section lets you download collected data for a project. + + + +## πŸŽ›οΈ Filter First + +Use the filter block above to define which files should be included in the export. + + + +Once filters are set, you have two options: + +- `get archive`: + +This sends a request to build a downloadable archive with both the media and annotations. +Since it may include many files, this can take some time. + +- `get annotation`: + +This gives you only annotation dataβ€”no media included. +It includes all annotations matching the current filters. + + + +## πŸ“‹ Archive Table + +Once requested, all archives for the project are listed in a table below. + +Column **requested** shows the exact filters used to generate each archive. + +Click the download button to get the archive. + + + +Once the process completes, the row turns green. This means you can get your archive. + + + +The archive process may fail. +In that case, the row turns red and the **result message** column shows what went wrong. + + + +## βž• Only New Files +When re-downloading later (e.g., the next day), you might want to include only new files. +Enable the `not downloaded before` flag to skip any files already included in past archives (based on the filters). diff --git a/docs/goals.md b/docs/goals.md new file mode 100644 index 00000000..d6ebc667 --- /dev/null +++ b/docs/goals.md @@ -0,0 +1,45 @@ +# 🎯 Goals + +This tab lets you define and track collection targets for specific labels. + + + +## βž• Create a Goal +To define a goal + +1. Select a **single label** from any depth of the hierarchy. +2. Set the desired **amount** (how many annotated samples you want). +3. Define media **weights** for: + - πŸ–ΌοΈ Images (e.g., 1) + - 🎞️ Videos (e.g., 2) + + + +A goal is considered fulfilled when the weighted sum of validated data reaches the target: + +Example - Goal = 5, image weight = 1, video weight = 2 +You can complete the goal with 5 images, or 2 videos + 1 image, etc. + +## πŸ“Š Goal Table + +Active goals are listed with: + +- **Label name** +- **Media weights** +- **Completed count** +- **Remaining amount** +- **Items on validation** +- **Validation progress** + +The table is sorted by progress (least done on top) for better prioritization. + + + +Goals are hidden once fulfilled, to keep focus on active targets. +Toggle `show all` to view past goals or completed ones. + +--- + +### βœ… Next Step + +- [Statistics](/docs/statistics.md) diff --git a/docs/labels.md b/docs/labels.md new file mode 100644 index 00000000..068acdce --- /dev/null +++ b/docs/labels.md @@ -0,0 +1,99 @@ +# 🧩 Labels + +Each project uses a label schema to annotate collected data. Labels help ensure even data collection and reduce future class imbalance. These can include: + +- Flat or hierarchical +- Required or optional +- Single or multiple choice + +## πŸ› οΈ Defining a Schema + +When creating a project or later in the Project β†’ Edit tab: + + + +1. Add Tree-like Levels + + + +2. Add Values per level, with optional nesting. Each value is an actual label used during annotation. + + + +3. Set flags like: + - `required` + - `multiple choice` + + + +4. Each label value could have a payload. This is a meta information in valid `json/string` format. +You can set restricted flag which sets if payload is required +Examples: +- `label payload` +- `{"field1": "value1", "type": 1, "list": ["metalist1", "metalist2"]}` +- `["meta1", "meta2"]` + + + +5. You can do a quick renaming with special form. + + + +6. You can change alignment of values. + + + +7. Deleting may be performed only when no media is assigned to such label or level of labels. +When you hit remove (`minus`) button - the popup will tell you if this item cannot be removed. +If you really want to remove it even when it's restricted you could change labeling at validation tab to remove association + + + +8. Grouping +Each block represents a separate attribute tree (e.g., color, shape, type). +In order to add another label with different meaning you press `Add attribute` at the top. +I.E. each feature has its own tree / block/ + +To remove a group completely you have to delete all the levels. + + + +## πŸ“€ Applying Schema + +When [uploading data](/docs/uploads.md), you assign the schema: +- With special form to apply tree o the whole set you uploaded +- Or manually, item by item + +These methods can be used simultaneously + + + +The label hierarchy defined in the project could be set several times by clicking `add object` button. +This will create a new block with its own hierarchy. +This might be useful if your image for example has several annotated objects like cars or animals. + + + +Each group can be easily deleted or copied. + + + +## πŸ“ Validation Stage + +During [validation](/docs/validation.md), labels can be you have same tree you assigned on Upload stage with the same management. + + + +## πŸ” Filtering + +At some pages there is an option to use Schema labels as filters. +The usage is almost the same as you apply labels to the media. +When you are done with tree click `select` button. + + + +--- + +### βœ… Next Step + +[Users and Roles](/docs/users.md) diff --git a/docs/projects.md b/docs/projects.md new file mode 100644 index 00000000..bc9ee3ba --- /dev/null +++ b/docs/projects.md @@ -0,0 +1,135 @@ +# πŸ“ Projects + +Projects are the core unit of organization in the ISS Data Collection Tool. +Each project defines its own label system, goals, and data scope. + + + +## πŸ”Ή What is a Project? + +A project groups: + +- [A labeling taxonomy (flat or hierarchical)](/docs/labels.md) +- [Uploaded images/videos](/docs/uploads.md) +- [Collection goals (per label)](/docs/goals.md) +- [Assigned users with roles](/docs/users.md) + +## πŸ“€ Creating a Project + +Only [admin/superusers](#-roles--access) can create new projects via the UI or API. + +To create a project: +1. Log in as an admin +2. Go to **Projects** section (/projects) +3. Click **create project** +4. Fill out: + - **Name of the project** + - **Project Description** + - **Label Schema** + + + +Next you will be redirected back to list of projects. +Select your newly created one. + + + +Each project has the following sections: +(admin users will see all sections; regular users will only see a subset) + +- **Main info** β†’ `/project/:id` +- [**upload data**](#-uploading) β†’ `/project/:id/upload` +- [**validate data**](#-validation) β†’ `/project/:id/validate` +- [**goals**](#-goals) β†’ `/project/:id/goals` +- [**statistics**](#-stats-progress) β†’ `/project/:id/stats` +- [**download data**](#-downloading-results) β†’ `/project/:id/download` +- [**edit**](#-edit-project) β†’ `/project/:id/edit` + +More of that below + +## πŸ‘₯ Roles & Access + +Users are split by admin/ non admin ones. +Admins have full access to the whole application and items; +Non admin users by default don't have any access to project or its data +so you need to manually edit the permissions for it if needed. + + + +See [Users & Roles](/docs/users.md) for how to manage users and set permissions + +## πŸ–ΌοΈ Uploading + +After creating you can upload images or videos. +After labeling and sending media to the server they will appear in validation section. +Note: media processing on the server side may cause a slight delay before visibility. + + + +See [Uploads](/docs/uploads.md) for how to upload media + +## πŸ“ Validation + +When you are done with uploading you can validate this data whether it's applicable to your project or not. +Labels could be corrected there too. + + + +See [Validation](/docs/validation.md) for how to validate/ see uploaded images + +## 🧩 Label Schema + +Each project might have a label schema, defining what’s being collected. Labels can be: + +- Flat list (e.g. `Red`, `Green`, `Blue`) +- Hierarchical (e.g. `Animal > Dog > Labrador`) +- Required +- Multiple items + + + +See [Labels](/docs/labels.md) for how to create and assign them. + +## 🎯 Goals + +Optionally, you can define target counts for labels (e.g. "Need 100 images of `Dog`"). + + + +See [Goals](/docs/goals.md) for how to create and track them. + +## πŸ“Š Stats & Progress + +Each project has a brief dashboard shows the collected count grouped by: +- Labels +- Media type +- Validation type + + + +See [Statistics](/docs/statistics.md) + +## πŸ—ƒ Downloading Results + +You can export annotated datasets as `.zip` archives with optional filters + + + +See [Downloads](/docs/downloads.md) for how to request archives + +## βš™οΈ Edit project + +You can at any time change project info, labels, permissions or delete the Project. +Deleting project won't cause to loose any data. It's just mark as hidden so it can be easily restored. + +After you are done with editing click `SUBMIT EDIT` + +User roles could set with dedicated button. + + + +--- + +### βœ… Next Step + +[Labels](/docs/labels.md) diff --git a/docs/quickstart.md b/docs/quickstart.md new file mode 100644 index 00000000..9152cb20 --- /dev/null +++ b/docs/quickstart.md @@ -0,0 +1,43 @@ +# πŸš€ Quickstart + +This guide walks you through setting up and using ISS Data Collection Tool locally. + +## 1. Clone the Repository + +```bash +git clone https://github.com/ISSResearch/Data-Collection-Tool.git +cd data-collection-tool +``` + +## 2. Build images + +```bash +make build +``` + +## 3. Start in Prod Mode + +```bash +cp .env.sample .env +make start +``` + +Make sure make, Docker, and Docker Compose are installed. +Edit the .env file to suit your environment before proceeding. + +This command starts all services in production mode using Docker Compose. +The project will be accessible at http://localhost:8000 (default port). + +## 4. Create admin user +```bash +make init-admin +``` + +You must create a superuser before accessing the UI. +Only admin users can currently create new projects and manage roles/permissions. +More about that in [Users and Roles](/docs/users.md) + +--- + +### βœ… Next Step +[Project](/docs/projects.md) diff --git a/docs/statistics.md b/docs/statistics.md new file mode 100644 index 00000000..693e4cdd --- /dev/null +++ b/docs/statistics.md @@ -0,0 +1,46 @@ +# πŸ“Š Stats + +This tab displays how labels are used and how annotations are validated, broken down by media type. + + + +## πŸ” Explore by Attribute or User + +You can group stats by: + +- **Attribute** (label and its nested structure) + + + +- **User** (who uploaded the data) + + + +Click on a `parent row` to expand child items. + +## πŸ“₯ Export Options + +Export the table in: + +- `csv` +- `json` +- `xlsx` + + + +Useful for further processing, reporting, or audits. + +## ↔️ Diff table + +The diff view splits data around a reference upload date. + +For example, setting `Diff from`: *2025-01-01* highlights what’s been uploaded since thenβ€”helpful for tracking changes over time. +No data is displayed until a reference date is selected. + + + +--- + +### βœ… Next Step + +- [Downloads](/docs/downloads.md) diff --git a/docs/uploads.md b/docs/uploads.md new file mode 100644 index 00000000..c0a9e1fa --- /dev/null +++ b/docs/uploads.md @@ -0,0 +1,68 @@ +# πŸ“€ Uploads + + + +## πŸ” Select Files + +Upload images or videos. The UI shows thumbnails for each selected file. + +You can do that by: + - Drag and dropping your files + - Clicking `Add Media` button at the top left corner + + + + +## 🧱 Annotate with Label Trees + +Each image/video could be annotated using a label schema you've defined earlier. + +You can: + +- `Add Object`: Adds another instance of the label tree per image (useful for multiple cars/objects in one image). +- `Delete Group`: Removes that label instance. +- `Copy Group`: Clones an existing label set within the item. + + + +## πŸŒ€ Batch Apply +The block on the left is for applying one label tree to all selected media. + +You define the values. +Hit `apply to all` β†’ auto-fills that tree across items. + +# πŸ” Interactive Media + +Click any image to zoom in. Useful for small objects or cluttered scenes. +This helps especially when annotating dense scenes or small objects. + + + +## ⏳ Uploading + +Once done labeling: +Click `Upload` at the top right corner +Files get submitted to the server for processing. + + + +You will be redirected to dedicated page showing the uploading state + + + +Once all media marked either green or red according to success status +you are free to leave the page. + + + +Server will: + +- **Check** for duplicates +- **Store** label metadata +- **Delay** validation access until checks complete + +--- + +### βœ… Next Step + +- [Validation](/docs/validation.md) diff --git a/docs/users.md b/docs/users.md new file mode 100644 index 00000000..a38567ea --- /dev/null +++ b/docs/users.md @@ -0,0 +1,41 @@ +# πŸ‘₯ Users & Roles + +To operate the system, at least one user must be created via CLI: + +```bash +make init-admin +``` + +## πŸ”‘ Roles +- **Admin** – full access across the system +- **Collector** / Common User – limited role; permissions are set per project + +## 🧾 User Creation +- Admins: created manually or promoted from a common user +- Self-registered users: have no access until granted project-specific permissions + + + +## Permissions +Permissions are managed per project under: +Project β†’ Edit tab β†’ `USER VISIBILITY` + +There you’ll see a list of users and a cross-table of permissions. +Click `SUBMIT VISIBILITY` to save changes. + + + +Permission types: +- `Can view project` – appears in list and is accessible +- `Can upload` – access to upload and goal tabs +- `Can view files` – access to validation; sees own uploads only +- `Can validate` – full access to validation; can edit labels +- `Can view stats` – access to statistics +- `Can download` – access to download +- `Can edit` – access to edit; can modify project + +--- + +### βœ… Next Step + +[Uploads](/docs/uploads.md) diff --git a/docs/validation.md b/docs/validation.md new file mode 100644 index 00000000..2b228ffc --- /dev/null +++ b/docs/validation.md @@ -0,0 +1,91 @@ +# βœ… Validation + + + +## πŸ“‚ Browse Uploads + +See all uploaded media waiting for review. +Left sidebar lists files – click to load one into view. +Item card contains: +- **Short file id** +- **Upload user** +- **Upload date** + + + +- βœ… Green = **approved** +- ❌ Red = **rejected** +- πŸ”΅ Blue = **untouched** + + + +Use filters at the top to narrow by status, label, date or author. + + + +## 🎯 Review & Confirm Labels + +Main canvas shows the media + label overlays. + + + +Right sidebar holds the full label tree for that file along with file info such as: + +- **File id** +- **Validation user** +- **Validation date** +- **Label tree** +- **Manage buttons** + + + +You can: + +- **View** the media, move it, zoom +- **Edit** labels in-place +- **Add**/remove object groups +- **Accept** (βœ”) / Reject (βœ–) +- **Download** item + +## πŸ–±οΈ Quick Navigation + +Use these to streamline flow: + +- `← / β†’` β€” switch file +- `X` β€” reset canvas state +- `A` β€” accept current +- `D` β€” reject current +- Hover on thumbnail to preview, click ⬇️ to download. + +## 🧭 Done? + +Once all files are green or red you can go to the next page if some exists. +This could be done only manualy by now. + +## 🧬 Duplicate Detection System + +This system improves data integrity, avoids duplication of annotation work, and improves the consistency of image usage throughout the dataset lifecycle. + +The platform includes a **built-in duplicate detection mechanism** to streamline annotation workflows and prevents redundant effort. + +Key Features +- **Automatic Detection**: Upon image upload or validation, the system checks whether the image already exists in the current project. Detected duplicates are flagged immediately. +- **Visual Indicator**: Duplicates are visually marked with a large `DUPLICATE` tag displayed directly on the image view. + + + +- **Best Quality Selection**: When duplicates are found, the system automatically selects the **best quality version** (e.g., higher resolution) as the *primary reference image*. This version will be used upon downloading +- **Duplication Browser**: A new **"show duplicates"** button has been added, allowing quick access to all known duplicates of a given image, side-by-side. + + + +- **User Decisions**: Annotators can manually resolve duplicates. + +Duplicate checks currently operate **within a single project only**. +and will be **extended across all projects** in the future + +--- + +### βœ… Next Step + +- [Goals](/docs/goals.md)