From 459c7f7f346fe0c31b6a843d5ff706debe243660 Mon Sep 17 00:00:00 2001 From: Pitanga Innovare Date: Mon, 4 May 2026 16:16:23 -0300 Subject: [PATCH 1/2] Polish admin menu and project docs --- .pre-commit-config.yaml | 8 +++--- Makefile | 33 +++++++++++------------ README.md | 53 ++++++++++++++++++++++++++++++------- collection/wagtail_hooks.py | 21 ++++++++++++++- core/wagtail_hooks.py | 16 +++++++++++ production.yml | 6 +---- setup.cfg | 2 +- 7 files changed, 102 insertions(+), 37 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 73bf112..305add9 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,4 +1,4 @@ -exclude: "^docs/|/migrations/" +exclude: "^docs/|/migrations/|.*/migrations/.*" default_stages: [commit] repos: @@ -10,7 +10,7 @@ repos: - id: check-yaml - repo: https://github.com/psf/black - rev: 23.1.0 + rev: 23.12.0 hooks: - id: black @@ -20,11 +20,11 @@ repos: - id: isort - repo: https://github.com/PyCQA/flake8 - rev: 6.0.0 + rev: 6.1.0 hooks: - id: flake8 args: ["--config=setup.cfg"] - additional_dependencies: [flake8-isort] + additional_dependencies: [flake8-isort==6.1.1] # sets up .pre-commit-ci.yaml to ensure pre-commit dependencies stay up to date ci: diff --git a/Makefile b/Makefile index 93ffd33..978625e 100644 --- a/Makefile +++ b/Makefile @@ -4,9 +4,9 @@ COMPOSE_FILE_DEV = local.yml compose = ${COMPOSE_FILE_DEV} -export SCMS_BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") -export SCMS_VCS_REF=$(strip $(shell git rev-parse --short HEAD)) -export SCMS_WEBAPP_VERSION=$(strip $(shell cat VERSION)) +export SCIELO_USAGE_BUILD_DATE=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") +export SCIELO_USAGE_VCS_REF=$(strip $(shell git rev-parse --short HEAD)) +export SCIELO_USAGE_WEBAPP_VERSION=$(strip $(shell cat VERSION)) help: ## Show this help @echo 'Usage: make [target] [argument] ...' @@ -23,13 +23,13 @@ help: ## Show this help @echo "\t Type 'make up' is the same of type 'make up compose=local.yml'" app_version: ## Show version of webapp - @echo "Version: " $(SCMS_WEBAPP_VERSION) + @echo "Version: " $(SCIELO_USAGE_WEBAPP_VERSION) latest_commit: ## Show last commit ref - @echo "Latest commit: " $(SCMS_VCS_REF) + @echo "Latest commit: " $(SCIELO_USAGE_VCS_REF) build_date: ## Show build date - @echo "Build date: " $(SCMS_BUILD_DATE) + @echo "Build date: " $(SCIELO_USAGE_BUILD_DATE) ############################################ ## atalhos docker compose desenvolvimento ## @@ -75,10 +75,10 @@ django_bash: ## Open a bash terminar from django container using $(compose) @docker compose -f $(compose) run --rm django bash django_test: ## Run tests from django container using $(compose) - @docker compose -f $(compose) run --rm django python manage.py test + @docker compose -f $(compose) run --rm django pytest django_fast: ## Run tests fast from django container using $(compose) - @docker compose -f $(compose) run --rm django python manage.py test --failfast + @docker compose -f $(compose) run --rm django pytest --failfast django_makemigrations: ## Run makemigrations from django container using $(compose) @docker compose -f $(compose) run --rm django python manage.py makemigrations @@ -99,17 +99,17 @@ django_load_auth: ## Run manage.py dumpdata auth --indent=2 $(compose) @docker compose -f $(compose) run --rm django python manage.py loaddata --database=default fixtures/auth.json dump_data: ## Dump database into .sql $(compose) - docker exec -t scielo_core_local_postgres pg_dumpall -c -U debug > dump_`date +%d-%m-%Y"_"%H_%M_%S`.sql + @docker compose -f $(compose) exec -T postgres sh -c 'pg_dumpall -c -U "$$POSTGRES_USER"' > dump_`date +%d-%m-%Y"_"%H_%M_%S`.sql restore_data: ## Restore database into from latest.sql file $(compose) - cat backup/latest.sql | docker exec -i scielo_core_local_postgres psql -U debug + @docker compose -f $(compose) exec -T postgres sh -c 'psql -U "$$POSTGRES_USER"' < backup/latest.sql ############################################ ## Atalhos Úteis ## ############################################ clean_container: ## Remove all containers - @docker rm $$(docker ps -a -q --no-trunc) + @docker compose -f $(compose) rm -sf clean_dangling_images: ## Remove all dangling images @docker rmi -f $$(docker images --filter 'dangling=true' -q --no-trunc) @@ -117,14 +117,13 @@ clean_dangling_images: ## Remove all dangling images clean_dangling_volumes: ## Remove all dangling volumes @docker volume rm $$(docker volume ls -f dangling=true -q) -clean_project_images: ## Remove all images with "core" on name - @docker rmi -f $$(docker images --filter=reference='*scielo_core*' -q) +clean_project_images: ## Remove all images with "scielo_usage" on name + @docker rmi -f $$(docker images --filter=reference='*scielo_usage*' -q) volume_down: ## Remove all volume @docker compose -f $(compose) down -v -clean_migrations: ## Remove all migrations - @echo "Cleaning migrations..." - @find . -path "*/migrations/*.py" -not -name "__init__.py" -not -path "./django_celery_beat/migrations*" -not -path "./core_settings/migrations*" -not -path "./core/contrib/sites/migrations*" -not -path "./core/users/migrations*" -delete +clean_migrations: ## Remove generated migration bytecode only + @echo "Cleaning migration bytecode..." @find . -path "*/migrations/*.pyc" -delete - @echo "Migrations cleaned successfully." \ No newline at end of file + @echo "Migration bytecode cleaned successfully." diff --git a/README.md b/README.md index 4dd7a22..87734e0 100644 --- a/README.md +++ b/README.md @@ -2,8 +2,6 @@ A modernized platform for processing and indexing SciELO usage logs into OpenSearch, adhering to COUNTER R5.1 standards. -**Version**: 2.0.0 - ## Quick Start (Dev Installation) To build and run the application locally: @@ -30,6 +28,10 @@ make django_fast # tests with --failfast make django_migrate # apply migrations make django_makemigrations # generate new migrations make django_createsuperuser # create Wagtail admin user +make logs # follow all service logs +make ps # list compose services +make django_bash # open a bash shell in the django container +make django_compilemessages # compile translation files ``` **Run a single test file/path:** @@ -86,21 +88,48 @@ Metadata is kept in sync with SciELO sources (ArticleMeta, OPAC, Books, etc.) vi ## Environment Variables +Runtime configuration is loaded from `.envs/.local/` or `.envs/.production/` through the Compose files. + +### Core Services + | Variable | Default | Description | |---|---|---| -| `OPENSEARCH_URL` | — | OpenSearch cluster URL | -| `OPENSEARCH_BASIC_AUTH` | — | OpenSearch basic auth credentials (`user:pass`) | +| `OPENSEARCH_URL` | `http://localhost:9200/` | OpenSearch cluster URL | +| `OPENSEARCH_INDEX_NAME` | `usage` | OpenSearch index prefix | +| `OPENSEARCH_BASIC_AUTH` | `admin:admin` | OpenSearch basic auth credentials | | `OPENSEARCH_VERIFY_CERTS` | `False` | Verify SSL certificates for OpenSearch connections | | `USE_LOCAL_SCIELO_LIBS` | `0` | Mount local `scielo_log_validator` and `scielo_usage_counter` repos for development | | `DJANGO_SETTINGS_MODULE` | `config.settings.local` | Django settings module | | `REDIS_URL` | — | Redis connection URL for Celery | -## OpenSearch Storage Strategy (Hybrid Monthly) - -To optimize storage and performance, this system employs a **Hybrid Granularity** approach in OpenSearch: +### Collector Endpoints -- **Monthly Partitioning**: Indices are partitioned by month (e.g., `usage_monthly_books_2026`). -- **One Document per Month**: Each article/PID has exactly **one document per month**, drastically reducing the total document count (up to 30x reduction). +| Variable | Default | Description | +|---|---|---| +| `ARTICLEMETA_COLLECT_URL` | `http://articlemeta.scielo.org/api/v1/article/counter_dict` | ArticleMeta counter metadata endpoint | +| `ARTICLEMETA_MAX_RETRIES` | `5` | ArticleMeta retry attempts | +| `ARTICLEMETA_SLEEP_TIME` | `30` | Delay between ArticleMeta retries, in seconds | +| `OPAC_ENDPOINT` | `https://www.scielo.br/api/v1/counter_dict` | OPAC counter metadata endpoint | +| `OPAC_MAX_RETRIES` | `5` | OPAC retry attempts | +| `OPAC_SLEEP_TIME` | `30` | Delay between OPAC retries, in seconds | +| `OAI_PMH_PREPRINT_ENDPOINT` | `https://preprints.scielo.org/index.php/scielo/oai` | SciELO Preprints OAI-PMH endpoint | +| `OAI_METADATA_PREFIX` | `oai_dc` | OAI-PMH metadata prefix | +| `OAI_PMH_MAX_RETRIES` | `5` | OAI-PMH retry attempts | +| `DATAVERSE_ENDPOINT` | `https://data.scielo.org/api` | SciELO Data Dataverse API endpoint | +| `DATAVERSE_ROOT_COLLECTION` | `scielodata` | Dataverse root collection alias | +| `DATAVERSE_SLEEP_TIME` | `30` | Dataverse request timeout/retry delay, in seconds | +| `SCIELO_BOOKS_BASE_URL` | `http://localhost:5984` | SciELO Books CouchDB base URL | +| `SCIELO_BOOKS_DB_NAME` | `scielobooks_1a` | SciELO Books CouchDB database name | +| `SCIELO_BOOKS_TIMEOUT` | `60` | SciELO Books request timeout, in seconds | +| `SCIELO_BOOKS_LIMIT` | `1000` | SciELO Books changes-feed page size | + +## OpenSearch Storage Strategy + +The OpenSearch export keeps monthly usage documents with nested daily metrics, while index names depend on collection size: + +- **Large and xlarge collections**: annual indices, such as `usage_monthly_scl_2024` and `usage_yearly_scl_2024`. +- **Small collections**: stable collection indices, such as `usage_monthly_books` and `usage_yearly_books`. +- **One Document per Month**: Each document/PID has one monthly document per metric scope. - **Daily Nested Metrics**: Daily granularity is preserved inside each monthly document using a `daily_metrics` object. - **Atomic Upserts**: Data is merged using OpenSearch **Painless Scripts**, allowing multiple logs for the same day/month to be processed without data duplication or loss. @@ -112,9 +141,15 @@ All pipelines can be monitored through the **Wagtail Admin**: - **Daily Metric Jobs**: Track the history of daily processing and OpenSearch export attempts. - **Log Config**: Manage collection-specific settings, log paths, and notification emails. +Internally, log file statuses are stored as short codes such as `QUE`, `PAR`, and `PRO`, with labels displayed in the admin. + ### Useful Commands - `make django_shell`: Access the Django interactive shell. +- `make django_bash`: Open a bash shell in the Django container. +- `make logs`: Follow Docker Compose logs. +- `make ps`: Show running services. +- `docker compose -f local.yml run --rm django pytest path/to/test_file.py`: Run a single test file or path. - `docker logs -f scielo_usage_local_celeryworker`: Monitor real-time task execution. ## Dependencies diff --git a/collection/wagtail_hooks.py b/collection/wagtail_hooks.py index 52b31a8..018dab8 100644 --- a/collection/wagtail_hooks.py +++ b/collection/wagtail_hooks.py @@ -1,6 +1,10 @@ from django.utils.translation import gettext as _ -from wagtail.snippets.views.snippets import SnippetViewSet +from wagtail.snippets.models import register_snippet +from wagtail.snippets.views.snippets import SnippetViewSet, SnippetViewSetGroup +from config.menu import get_menu_order +from document.wagtail_hooks import DocumentSnippetViewSet +from source.wagtail_hooks import SourceSnippetViewSet from .models import Collection @@ -52,3 +56,18 @@ class CollectionSnippetViewSet(SnippetViewSet): "updated_by", ) export_filename = "collections" + + +class MetadataSnippetViewSetGroup(SnippetViewSetGroup): + menu_name = "metadata" + menu_label = _("Metadata") + menu_icon = "folder-open-inverse" + menu_order = get_menu_order("metadata") + items = ( + CollectionSnippetViewSet, + SourceSnippetViewSet, + DocumentSnippetViewSet, + ) + + +register_snippet(MetadataSnippetViewSetGroup) diff --git a/core/wagtail_hooks.py b/core/wagtail_hooks.py index a00ff21..e7da1eb 100644 --- a/core/wagtail_hooks.py +++ b/core/wagtail_hooks.py @@ -5,6 +5,15 @@ from wagtail import hooks +HIDDEN_MAIN_MENU_ITEMS = { + "documents", + "explorer", + "images", + "reports", + "snippets", +} + + @hooks.register("insert_global_admin_css", order=100) def global_admin_css(): """Add /static/css/custom.css to the admin.""" @@ -24,3 +33,10 @@ def global_admin_js(): @hooks.register("construct_homepage_summary_items", order=1) def remove_all_summary_items(request, items): items.clear() + + +@hooks.register("construct_main_menu") +def hide_generic_main_menu_items(request, menu_items): + menu_items[:] = [ + item for item in menu_items if item.name not in HIDDEN_MAIN_MENU_ITEMS + ] diff --git a/production.yml b/production.yml index 8cd684c..cdd4de3 100644 --- a/production.yml +++ b/production.yml @@ -1,5 +1,3 @@ -version: '3' - services: django: &django build: @@ -11,12 +9,10 @@ services: - redis - postgres - mailhog - - solr - pgbouncer links: - pgbouncer - - solr - volumes: + volumes: - .:/app:z - ../scms_data/scielo_usage/data/logs:/data/logs - ../scms_data/scielo_usage/data/supplies:/data/supplies diff --git a/setup.cfg b/setup.cfg index 3017a04..c4ae862 100644 --- a/setup.cfg +++ b/setup.cfg @@ -18,7 +18,7 @@ force_grid_wrap = 0 use_parentheses = true [mypy] -python_version = 3.9 +python_version = 3.11 check_untyped_defs = True ignore_missing_imports = True warn_unused_ignores = True From c3d72d5f3d5065df2d2358dd9d92550ee11b75cd Mon Sep 17 00:00:00 2001 From: Rafael JP Damaceno Date: Mon, 4 May 2026 16:24:07 -0300 Subject: [PATCH 2/2] atualiza VERSION para 2.0.1 --- VERSION | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/VERSION b/VERSION index 227cea2..38f77a6 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -2.0.0 +2.0.1