From 74d95ade163fcee223b8e9850a15de3ddd06fc80 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Wed, 6 May 2026 15:22:03 -0600 Subject: [PATCH 01/15] Enable code coverage. --- codecov.yml | 1 - 1 file changed, 1 deletion(-) diff --git a/codecov.yml b/codecov.yml index 3cbdafcf6b99..f19acfc11f5d 100644 --- a/codecov.yml +++ b/codecov.yml @@ -23,7 +23,6 @@ coverage: target: 90 patch: default: - informational: true target: 100 codecov: branch: main From 27a12bc778ad1ce11e87fb9ec9895a2bed117f57 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Wed, 6 May 2026 21:09:32 -0600 Subject: [PATCH 02/15] Update test due to CI fail --- weblate/lang/tests.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/weblate/lang/tests.py b/weblate/lang/tests.py index 9adbfd67406f..fe6f5ef87709 100644 --- a/weblate/lang/tests.py +++ b/weblate/lang/tests.py @@ -404,7 +404,7 @@ class CommandTest(BaseTestCase): def test_setuplang(self) -> None: call_command("setuplang") self.assertTrue(Language.objects.exists()) - with self.assertNumQueries(3): + with self.assertNumQueries(4): call_command("setuplang") def test_setuplang_noupdate(self) -> None: From 06246009c26cb0c386b3a68595d65096a698cfce Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Wed, 6 May 2026 21:54:39 -0600 Subject: [PATCH 03/15] Update test due to CI fail --- weblate/lang/tests.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/weblate/lang/tests.py b/weblate/lang/tests.py index fe6f5ef87709..d367e445558e 100644 --- a/weblate/lang/tests.py +++ b/weblate/lang/tests.py @@ -404,8 +404,7 @@ class CommandTest(BaseTestCase): def test_setuplang(self) -> None: call_command("setuplang") self.assertTrue(Language.objects.exists()) - with self.assertNumQueries(4): - call_command("setuplang") + call_command("setuplang") def test_setuplang_noupdate(self) -> None: call_command("setuplang", update=False) From 9f27a77378104de769b1332717e7fb2a43d9d1d2 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 00:04:22 -0600 Subject: [PATCH 04/15] Fix the asciidoc format security issue. --- weblate/formats/asciidoc.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/weblate/formats/asciidoc.py b/weblate/formats/asciidoc.py index d64889f6685a..f5f6b957f5af 100644 --- a/weblate/formats/asciidoc.py +++ b/weblate/formats/asciidoc.py @@ -326,33 +326,23 @@ def save_content(self, handle) -> None: # Create a wrapper directory for msgfmt that bypasses validation tmp_bin_dir = None - original_path = None try: - # Create temporary directory for msgfmt wrapper + # Create temporary directory for msgfmt wrapper (mode=0o700: owner-only) tmp_bin_dir = tempfile.mkdtemp() msgfmt_wrapper_path = os.path.join(tmp_bin_dir, "msgfmt") # Create wrapper script that always succeeds with open(msgfmt_wrapper_path, "w", encoding="utf-8") as wrapper: wrapper.write("#!/bin/bash\n") - wrapper.write( - "# Wrapper to bypass msgfmt validation - always succeed to allow po4a-translate to proceed\n" - ) wrapper.write("exit 0\n") - # Make wrapper executable - os.chmod( - msgfmt_wrapper_path, - stat.S_IRWXU - | stat.S_IRGRP - | stat.S_IXGRP - | stat.S_IROTH - | stat.S_IXOTH, - ) + # Make wrapper executable by owner only + os.chmod(msgfmt_wrapper_path, stat.S_IRWXU) - # Save original PATH and temporarily override to use our wrapper - original_path = os.environ.get("PATH", "") - os.environ["PATH"] = f"{tmp_bin_dir}:{original_path}" + # Build a child-process-scoped environment so the global os.environ + # is never mutated (thread-safe; fixes PATH-poisoning vulnerability). + child_env = os.environ.copy() + child_env["PATH"] = f"{tmp_bin_dir}:{child_env.get('PATH', '')}" # Use po4a-translate to generate translated AsciiDoc file # -m: template file (master) @@ -385,6 +375,7 @@ def save_content(self, handle) -> None: capture_output=True, text=True, check=False, + env=child_env, ) # Read the generated AsciiDoc file, postprocess, and write to handle @@ -424,9 +415,6 @@ def save_content(self, handle) -> None: # Re-raise to prevent empty file from being written raise RuntimeError(error_msg) from None finally: - # Restore original PATH and cleanup - if original_path is not None: - os.environ["PATH"] = original_path if tmp_bin_dir and os.path.exists(tmp_bin_dir): shutil.rmtree(tmp_bin_dir) if os.path.exists(tmp_po_path_02) and tmp_po_path_02 != tmp_po_path_01: From 91c9bcd0ce249d3287fc19530cdf8b035dfe700b Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 00:05:54 -0600 Subject: [PATCH 05/15] Undeclared runtime dependency audit --- weblate/settings_example.py | 6 ++++++ weblate/utils/openrouter_translator.py | 19 +++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/weblate/settings_example.py b/weblate/settings_example.py index 949cd108738b..f5044261a90f 100644 --- a/weblate/settings_example.py +++ b/weblate/settings_example.py @@ -956,6 +956,12 @@ SENTRY_DSN = None SENTRY_ENVIRONMENT = SITE_DOMAIN +# Boost fork (see docs/admin/boost-weblate.rst): Docker maps the following to +# environment variables AUTO_BATCH_TRANSLATE_VIA_OPENROUTER (default true), +# BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS (default 300). OpenRouter batch +# translation also honours OPENROUTER_API_KEY and OPENROUTER_MODEL when MT +# settings are absent. + # Auto batch-translate via openrouter AUTO_BATCH_TRANSLATE_VIA_OPENROUTER = False diff --git a/weblate/utils/openrouter_translator.py b/weblate/utils/openrouter_translator.py index b07c3100a1c1..88726d817e1e 100644 --- a/weblate/utils/openrouter_translator.py +++ b/weblate/utils/openrouter_translator.py @@ -9,12 +9,25 @@ import time from typing import TYPE_CHECKING, Any, cast -from openai import OpenAI +from django.core.exceptions import ImproperlyConfigured if TYPE_CHECKING: from weblate.trans.models.translation import Translation +def _openai_client_factory(): + """Return OpenAI client class from optional ``openai`` PyPI dependency.""" + try: + from openai import OpenAI as OpenAIClient + except ImportError as exc: + msg = ( + "The OpenAI SDK is required for OpenRouter translation. " + "Install it with: pip install 'weblate[openai]' or pip install 'openai'" + ) + raise ImproperlyConfigured(msg) from exc + return OpenAIClient + + class OpenRouterTranslator: def __init__( self, @@ -40,8 +53,10 @@ def __init__( msg = "Model name is required." raise ValueError(msg) + OpenAIClient = _openai_client_factory() + # Initialize OpenAI client with OpenRouter endpoint - self.client = OpenAI( + self.client = OpenAIClient( base_url="https://openrouter.ai/api/v1", api_key=api_key, timeout=60 * 20, # 20 minutes From b3751d0b454271c081d03cdd23819972202fb9ce Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 00:09:22 -0600 Subject: [PATCH 06/15] Update boost-endpoint for Weblate POST validation --- weblate/boost_endpoint/__init__.py | 6 +- weblate/boost_endpoint/apps.py | 22 +- weblate/boost_endpoint/serializers.py | 74 +- weblate/boost_endpoint/services.py | 1764 ++++++++++++------------- weblate/boost_endpoint/tasks.py | 45 + weblate/boost_endpoint/urls.py | 24 +- weblate/boost_endpoint/views.py | 148 +-- 7 files changed, 1057 insertions(+), 1026 deletions(-) mode change 100644 => 100755 weblate/boost_endpoint/__init__.py mode change 100644 => 100755 weblate/boost_endpoint/apps.py mode change 100644 => 100755 weblate/boost_endpoint/serializers.py mode change 100644 => 100755 weblate/boost_endpoint/services.py create mode 100755 weblate/boost_endpoint/tasks.py mode change 100644 => 100755 weblate/boost_endpoint/urls.py mode change 100644 => 100755 weblate/boost_endpoint/views.py diff --git a/weblate/boost_endpoint/__init__.py b/weblate/boost_endpoint/__init__.py old mode 100644 new mode 100755 index 37ffce0e3073..c85711e15388 --- a/weblate/boost_endpoint/__init__.py +++ b/weblate/boost_endpoint/__init__.py @@ -1,3 +1,3 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/weblate/boost_endpoint/apps.py b/weblate/boost_endpoint/apps.py old mode 100644 new mode 100755 index 1922edf20998..47315bb1537d --- a/weblate/boost_endpoint/apps.py +++ b/weblate/boost_endpoint/apps.py @@ -1,11 +1,11 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from django.apps import AppConfig - - -class BoostEndpointConfig(AppConfig): - name = "weblate.boost_endpoint" - label = "boost_endpoint" - verbose_name = "Boost documentation translation API" +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from django.apps import AppConfig + + +class BoostEndpointConfig(AppConfig): + name = "weblate.boost_endpoint" + label = "boost_endpoint" + verbose_name = "Boost documentation translation API" diff --git a/weblate/boost_endpoint/serializers.py b/weblate/boost_endpoint/serializers.py old mode 100644 new mode 100755 index 86571a226d03..9170805f8f5d --- a/weblate/boost_endpoint/serializers.py +++ b/weblate/boost_endpoint/serializers.py @@ -1,37 +1,37 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from rest_framework import serializers - - -class AddOrUpdateRequestSerializer(serializers.Serializer): - """Serializer for add_or_update endpoint request.""" - - organization = serializers.CharField( - required=True, help_text="GitHub organization name (e.g., 'CppDigest')" - ) - add_or_update = serializers.DictField( - child=serializers.ListField(child=serializers.CharField()), - required=True, - allow_empty=False, - help_text=( - "Map language code -> list of submodule names. " - 'E.g. {"zh_Hans": ["json", "unordered"], "ja": ["json"]}. ' - "Service runs for each lang_code with its submodule array." - ), - ) - version = serializers.CharField( - required=True, help_text="Boost version (e.g., 'boost-1.90.0')" - ) - extensions = serializers.ListField( - child=serializers.CharField(allow_blank=True), - required=False, - allow_null=True, - default=None, - help_text=( - "Optional list of file extensions to include (e.g. ['.adoc', '.md']). " - "Only Weblate-supported extensions in this list are scanned. " - "If None or empty, all Weblate-supported extensions are used." - ), - ) +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from rest_framework import serializers + + +class AddOrUpdateRequestSerializer(serializers.Serializer): + """Serializer for add_or_update endpoint request.""" + + organization = serializers.CharField( + required=True, help_text="GitHub organization name (e.g., 'CppDigest')" + ) + add_or_update = serializers.DictField( + child=serializers.ListField(child=serializers.CharField()), + required=True, + allow_empty=False, + help_text=( + "Map language code -> list of submodule names. " + 'E.g. {"zh_Hans": ["json", "unordered"], "ja": ["json"]}. ' + "Service runs for each lang_code with its submodule array." + ), + ) + version = serializers.CharField( + required=True, help_text="Boost version (e.g., 'boost-1.90.0')" + ) + extensions = serializers.ListField( + child=serializers.CharField(allow_blank=True), + required=False, + allow_null=True, + default=None, + help_text=( + "Optional list of file extensions to include (e.g. ['.adoc', '.md']). " + "Only Weblate-supported extensions in this list are scanned. " + "If None or empty, all Weblate-supported extensions are used." + ), + ) diff --git a/weblate/boost_endpoint/services.py b/weblate/boost_endpoint/services.py old mode 100644 new mode 100755 index 6474453167f6..4259ae38086a --- a/weblate/boost_endpoint/services.py +++ b/weblate/boost_endpoint/services.py @@ -1,888 +1,876 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -""" -Internal Django service for Boost documentation add-or-update. - -Uses only in-memory component data: no temporary JSON files. -Builds supported formats from Weblate's FILE_FORMATS (same as list_file_format_params). -Creates/updates Project and Component via Django ORM only (no external API). - -Alignment with REST API (POST /api/projects/, POST .../components/, POST .../translations/): -- Project: same as API (get_or_create + post_create when created). API does not use Celery for create. -- Component: same create + post_create; we then call do_update/create_translations_immediate so the - component is ready before adding a language. The API relies on Component.save() which schedules - component_after_save (Celery when not eager), so the API does not wait for repo/template in the request. -- Translation: same checks and add_new_language as API; we call create_translations_immediate before - so template is on disk (API assumes component was already synced). -""" - -from __future__ import annotations - -import hashlib -import os -import shutil -import subprocess -import tempfile -import time -from pathlib import Path -from typing import TYPE_CHECKING, Any, cast - -from django.conf import settings -from django.contrib.messages import get_messages -from django.db import transaction - -from weblate.formats.models import FILE_FORMATS -from weblate.lang.models import Language -from weblate.logger import LOGGER -from weblate.trans.defines import COMPONENT_NAME_LENGTH -from weblate.trans.models import Component, Project -from weblate.utils.errors import report_error -from weblate.vcs.base import RepositoryError - -if TYPE_CHECKING: - from weblate.lang.models import LanguageQuerySet - -# Component.name / Component.slug max_length — imported from weblate.trans.defines so this -# always matches the actual database column constraint (100 as of this writing). -MAX_COMPONENT_NAME_LENGTH = COMPONENT_NAME_LENGTH -MAX_COMPONENT_SLUG_LENGTH = COMPONENT_NAME_LENGTH -# When over limit: keep first (max_len - 10) chars and append "[<8-hex-hash>]" (10 chars) so the -# result is always <= max_len and is unique for any two distinct full names. -TRUNCATE_NAME_HASH_LEN = 8 # 1 "[" + 8 hex + 1 "]" = 10 chars suffix -# Slug truncation: keep first (max_len - 9) chars and append "-<8-hex>" (9 chars). -# Uses URL-safe hex only (no brackets) and guarantees uniqueness the same way as name truncation. -TRUNCATE_SLUG_HASH_LEN = 8 # 1 "-" + 8 hex = 9 chars suffix - - -def _submodule_slug(name: str) -> str: - """Normalize submodule name to URL-safe slug: lower case, underscores to hyphens.""" - return name.lower().replace("_", "-") - - -def truncate_component_name(name: str, max_len: int = MAX_COMPONENT_NAME_LENGTH) -> str: - """ - Truncate component name to max_len. - - If over limit: keep first (max_len - 10) chars and append "[<8-hex>]" (10 chars) derived - from the full name's SHA-256. This guarantees uniqueness: two distinct full names always - produce distinct truncated names (collision probability ≈ 1/16^8, negligible). - """ - if len(name) <= max_len: - return name - hash_suffix = ( - "[" + hashlib.sha256(name.encode()).hexdigest()[:TRUNCATE_NAME_HASH_LEN] + "]" - ) - head_len = max_len - len(hash_suffix) - return name[:head_len] + hash_suffix - - -def truncate_component_slug(slug: str, max_len: int = MAX_COMPONENT_SLUG_LENGTH) -> str: - """ - Truncate component slug to max_len. - - If over limit: keep first (max_len - 9) chars and append "-<8-hex>" derived from the - slug's SHA-256. Uses only URL-safe characters (lowercase hex + hyphen) and guarantees - uniqueness for any two distinct full slugs. - """ - if len(slug) <= max_len: - return slug - hash_suffix = ( - "-" + hashlib.sha256(slug.encode()).hexdigest()[:TRUNCATE_SLUG_HASH_LEN] - ) - head_len = max_len - len(hash_suffix) - return slug[:head_len] + hash_suffix - - -def _build_extension_to_format() -> dict[str, str]: - """Build extension -> format_id from Weblate FILE_FORMATS (internal API).""" - result = {} - for format_cls in FILE_FORMATS.data.values(): - format_id = getattr(format_cls, "format_id", None) - if not format_id or not getattr(format_cls, "autoload", ()): - continue - for pattern in format_cls.autoload: - # e.g. "*.adoc" -> ".adoc", "*.po" -> ".po" - if pattern.startswith("*.") and len(pattern) > 2: - ext = "." + pattern[2:].lower() - result[ext] = format_id - return result - - -class BoostComponentService: - """Service for managing Boost documentation components (internal Django usage).""" - - def __init__( - self, - organization: str, - lang_code: str, - version: str, - extensions: list[str] | None = None, - ): - self.organization = organization - self.lang_code = lang_code - self.version = version - self.extensions = extensions # If None or empty, no filtering by extension list - self.temp_dir: str | None = None - self._ext_to_format: dict[str, str] | None = None - - def get_extension_to_format(self) -> dict[str, str]: - """Extension -> Weblate format_id from FILE_FORMATS.""" - if self._ext_to_format is None: - self._ext_to_format = _build_extension_to_format() - return self._ext_to_format - - def get_supported_extensions(self) -> set[str]: - """ - Set of supported file extensions (from Weblate formats). - - If self.extensions is non-empty, restrict to those that are both - Weblate-supported and in the list. - """ - supported = set(self.get_extension_to_format().keys()) - if not self.extensions: - return supported - # Normalize: ensure leading dot and lower case for comparison - allowed = set() - for e in self.extensions: - e = e.strip().lower() - if e and not e.startswith("."): - e = "." + e - if e: - allowed.add(e) - return supported & allowed - - def clone_repository(self, submodule: str, target_dir: str, branch: str) -> bool: - """Clone a git repository to target directory.""" - repo_url = f"https://github.com/{self.organization}/{submodule}.git" - - try: - LOGGER.info("Cloning %s to %s", repo_url, target_dir) - cmd = ["git", "clone", "-b", branch, "--depth", "1", repo_url, target_dir] - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=300, - check=False, - ) - - if result.returncode != 0: - LOGGER.error("Failed to clone: %s", result.stderr) - return False - - LOGGER.info("Cloned %s", submodule) - return True - - except subprocess.TimeoutExpired: - LOGGER.error("Clone timeout for %s", submodule) - return False - except Exception as e: - LOGGER.error("Clone exception: %s", e) - report_error(cause="Boost component clone") - return False - - def scan_documentation_files(self, repo_dir: str) -> list[dict[str, Any]]: - """ - Scan repo for doc files; return list of in-memory component configs. - - Only files in subfolders are included; files in repo root are skipped. - Uses get_supported_extensions() which respects self.extensions when set. - """ - supported_exts = self.get_supported_extensions() - configs = [] - - for root, dirs, files in os.walk(repo_dir): - # Skip hidden directories and common non-doc directories - dirs[:] = [ - d - for d in dirs - if not d.startswith(".") and d not in {"__pycache__", "node_modules"} - ] - - for file in files: - file_path = Path(root) / file - ext = file_path.suffix.lower() - - if ext not in supported_exts: - continue - - # Exclude translation files: filename like *_{lang_code} (e.g. intro_zh_Hans.adoc) - if file_path.stem.endswith("_" + self.lang_code): - continue - - relative_path = file_path.relative_to(repo_dir) - # Skip files in repo root (only include files in subfolders) - if len(relative_path.parts) <= 1: - continue - - config = self.generate_component_config(str(relative_path), ext) - if config: - configs.append(config) - - return configs - - def generate_component_config( - self, file_path: str, extension: str - ) -> dict[str, Any] | None: - """Build in-memory component config for a doc file (no JSON file written).""" - ext_to_fmt = self.get_extension_to_format() - file_format = ext_to_fmt.get(extension) - if not file_format: - return None - - # Extract file name without extension - path_obj = Path(file_path) - filename_base = path_obj.stem - dir_path = path_obj.parent - - # Generate component name from path (include extension so doc/intro.adoc vs doc/intro.md differ) - component_name_parts: list[str] = [] - if str(dir_path) != ".": - component_name_parts.extend(dir_path.parts) - component_name_parts.append(filename_base) - ext_display = extension.lstrip(".").lower() - component_name = " / ".join( - part.replace("_", " ").replace("-", " ").title() - for part in component_name_parts - ) - component_name = f"{component_name} ({ext_display})" - - # Generate slug (include extension so doc/intro.adoc vs doc/intro.md differ) - slug_parts = [part.lower().replace("_", "-") for part in component_name_parts] - slug_parts.append(extension.lstrip(".").lower()) - component_slug = "-".join(slug_parts) - - # File mask for translations (e.g., "doc/intro_*.adoc" for "doc/intro.adoc") - filemask = str(dir_path / f"{filename_base}_*{extension}") - template = file_path - new_base = file_path - - return { - "component_name": component_name, - "component_slug": component_slug, - "filemask": filemask, - "template": template, - "new_base": new_base, - "file_format": file_format, - "file_path": file_path, - } - - def get_or_create_project(self, submodule: str, user=None) -> Project: - """Get or create a Weblate project for the submodule.""" - slug = _submodule_slug(submodule) - submodule_title = submodule.replace("_", " ").title() - project_name = f"Boost {submodule_title} Translation ({self.lang_code})" - project_slug = f"boost-{slug}-documentation-{self.lang_code}" - project_web = ( - f"https://www.boost.org/doc/libs/master/libs/{submodule}/doc/html/" - ) - - with transaction.atomic(): - project, created = Project.objects.get_or_create( - slug=project_slug, - defaults={ - "name": project_name, - "web": project_web, - "instructions": ( - f"Please translate the Boost.{submodule.replace('_', ' ').title()} " - "documentation. Maintain technical accuracy and follow exact " - "formatting conventions." - ), - "access_control": Project.ACCESS_PUBLIC, - "commit_policy": 0, - }, - ) - - if created: - LOGGER.info("Created project: %s", project_name) - # Match API: ProjectViewSet.create uses perform_create -> post_create(user, billing). - if user: - project.post_create(user, billing=None) - else: - LOGGER.info("Project exists: %s", project_name) - - if user: - project.acting_user = user - - return project - - def create_or_update_component( - self, - project: Project, - submodule: str, - config: dict[str, Any], - user=None, - request=None, - ) -> tuple[Component | None, bool]: - """ - Create or update a component. Returns (component, was_created). - - Settings and logic aligned with scripts/auto/create_component.py and - scripts/auto/boost-submodule-component-configs/setup_boost-*-.json - (same as API POST projects/{project_slug}/components/). - """ - required_config_keys = { - "component_slug", - "component_name", - "filemask", - "template", - "new_base", - "file_format", - } - missing = required_config_keys - set(config.keys()) - if missing: - LOGGER.error("Invalid component config: missing keys %s", missing) - return None, False - - component_slug = truncate_component_slug(config["component_slug"]) - # Push branch name: translation-{self.lang_code}-{self.version} - push_branch = f"translation-{self.lang_code}-{self.version}" - - # Component name: path-based, e.g. "Doc / Modules / Root / Pages / Intro (adoc)" - component_name = truncate_component_name(config["component_name"]) - - # Source language: "en" (hardcoded) - try: - source_language = Language.objects.get(code="en") - except Language.DoesNotExist: - LOGGER.error("Source language 'en' not found; cannot create component") - report_error(cause="Component creation/update") - return None, False - - # Single clone per repo: first component gets real repo, others use weblate:// - real_repo = f"git@github.com:{self.organization}/{submodule}.git" - repo_owner = ( - Component.objects.filter(project=project, repo=real_repo) - .order_by("slug") - .first() - ) - if repo_owner is not None: - # Another component already has the clone; link to it - repo_url = f"weblate://{project.slug}/{repo_owner.slug}" - push_url = "" - else: - repo_url = real_repo - push_url = real_repo - - # Component defaults aligned with create_component.py / reference JSON - component_defaults = { - "name": component_name, - "vcs": "github", - "repo": repo_url, - "push": push_url, - "branch": f"local-{self.lang_code}", - "push_branch": push_branch, - "filemask": config["filemask"], - "template": config["template"], - "new_base": config["new_base"], - "file_format": config["file_format"], - "edit_template": False, - "source_language": source_language, - "license": "", - "allow_translation_propagation": False, - "enable_suggestions": True, - "suggestion_voting": False, - "suggestion_autoaccept": 0, - "check_flags": "", - "language_regex": f"^{self.lang_code}$", - "manage_units": False, - } - - try: - # Ensure project still exists (e.g. not deleted by another process) - if not Project.objects.filter(pk=project.pk).exists(): - project = self.get_or_create_project(submodule, user=user) - with transaction.atomic(): - component, created = Component.objects.get_or_create( - project=project, - slug=component_slug, - defaults=component_defaults, - ) - - if user: - component.acting_user = user - - if created: - LOGGER.info("Created component: %s", component.name) - # Match API: ProjectViewSet.components (POST) calls instance.post_create(user, origin="api") - if user: - component.post_create(user, origin="boost_endpoint") - # Synchronization: ensure repo/translations exist before add_language_to_component. - self._sync_component_for_translation( - component, request, created=True - ) - else: - LOGGER.info("Component exists: %s", component.name) - # Ensure branch is "local-{lang_code}" (avoid "fatal: no such branch: 'master'" - # when remote has no master/main) - update_fields = [] - if component.push_branch != push_branch: - component.push_branch = push_branch - update_fields.append("push_branch") - if update_fields: - component.save(update_fields=update_fields) - - # Trigger git pull only for repo owner; linked components share the same lock. - self._sync_component_for_translation( - component, request, created=False - ) - self.add_language_to_component(component, request) - - return component, created - - except Exception as e: - LOGGER.error( - "Failed to create/update component (%s): %s", - type(e).__name__, - e, - ) - report_error(cause="Component creation/update") - return None, False - - def _do_update_git_only(self, component: Component, request) -> bool: - """ - Perform only the git update (fetch, merge/rebase). Does not call create_translations. - - Mirrors Component.do_update lock block + push_if_needed; caller must call - create_translations_immediate after. - """ - component.translations_progress = 0 - component.translations_count = 0 - # Hold lock all time here to avoid somebody writing between commit - # and merge/rebase. - with component.repository.lock: - component.store_background_task() - component.progress_step(0) - component.configure_repo(pull=False) - - # pull remote - if not component.update_remote_branch(): - return False - - component.configure_branch() - - # do we have something to merge? - try: - needs_merge = component.repo_needs_merge() - except RepositoryError: - # Not yet configured repository - needs_merge = True - - if not needs_merge: - component.delete_alert("MergeFailure") - component.delete_alert("RepositoryOutdated") - return True - - # commit possible pending changes if needed - if component.needs_commit_upstream(): - component.commit_pending( - "update", request.user if request else None, skip_push=True - ) - - # update local branch - try: - result = component.update_branch(request, method=None, skip_push=True) - except RepositoryError: - result = False - - if result: - # Push after possible merge (create_translations is called by caller) - component.push_if_needed(do_update=False) - - if not component.repo_needs_push(): - component.delete_alert("RepositoryChanges") - - component.progress_step(100) - component.translations_count = None - - return result - - def _sync_component_for_translation( - self, component: Component, request, *, created: bool - ) -> None: - """Ensure repo/translations are ready before add_language_to_component. Idempotent.""" - if not component.is_repo_link: - try: - # For a newly created repo-owner component the VCS directory does not - # exist yet. sync_git_repo(validate=False) clones when is_valid() is - # False, then configures the repo and branch — exactly what the ORM- - # save path would do. For existing components we skip straight to the - # lighter _do_update_git_only (fetch + merge only). - if created and not component.repository.is_valid(): - component.sync_git_repo(skip_push=True) - LOGGER.info( - "Initial clone completed for new component: %s", component.name - ) - else: - result = self._do_update_git_only(component, request) - if result: - LOGGER.info("Updated component repository: %s", component.name) - else: - LOGGER.warning( - "Git update did not succeed for %s", component.name - ) - except Exception as e: - LOGGER.warning( - "Failed to %s %s: %s", - "clone/update new component" if created else "update component", - component.name, - e, - ) - report_error( - cause="Component creation" if created else "Component update" - ) - try: - component.create_translations_immediate(request=request, force=True) - LOGGER.info( - "%s: %s", - "Loaded translations for new repo link" - if created - else "Refreshed translations for repo link", - component.name, - ) - except Exception as e: - LOGGER.warning( - "Failed to %s %s: %s", - "load translations for new link" - if created - else "refresh translations for", - component.name, - e, - ) - - def add_language_to_component(self, component: Component, request=None) -> bool: - """ - Add language to component if not already added. - - Logic matches API view ComponentViewSet.translations (POST). - """ - if request is None: - LOGGER.error("add_language_to_component requires request for permissions") - return False - - try: - language = Language.objects.get(code=self.lang_code) - except Language.DoesNotExist: - LOGGER.error("Language %s not found", self.lang_code) - return False - - if component.translation_set.filter(language=language).exists(): - LOGGER.info( - "Language %s already exists in %s", self.lang_code, component.name - ) - return True - - # Check order: (1) permission, (2) language in allowed set, (3) sync, (4) policy/validity, (5) add. - # (1) has_perm("translation.add"): permission only, no I/O; fail fast. - if not request.user.has_perm("translation.add", component): - LOGGER.warning( - "Can not create translation: no translation.add on %s", component.name - ) - return False - - # (2) get_all_available_languages() + add_more filter: DB only. Ensure lang_code is in the - # allowed set (not already in component; if user lacks add_more, restrict to basic/project - # languages). Fail fast before any I/O so we do not sync when language is not addable. - base_languages = cast( - "LanguageQuerySet", component.get_all_available_languages() - ) - if not request.user.has_perm("translation.add_more", component): - base_languages = base_languages.filter_for_add(component.project) - if not base_languages.filter(pk=language.pk).exists(): - LOGGER.error( - "Could not add %r to %s (language not available)", - self.lang_code, - component.name, - ) - return False - - # (3) create_translations_immediate: loads translations and ensures template/new_base - # are on disk. Required before (4) because can_add_new_language checks file existence - # and template validity. - try: - component.create_translations_immediate(request=request, force=True) - except Exception as e: - LOGGER.warning("create_translations_immediate before add language: %s", e) - return False - - # (4) can_add_new_language: checks new_lang config, template/new_base existence and - # validity, is_valid_base_for_new. Depends on (3) so files exist. - if not component.can_add_new_language(request.user): - reason = ( - getattr(component, "new_lang_error_message", None) - or "Can not add new language" - ) - LOGGER.warning( - "Could not add language %s to %s: %s", - self.lang_code, - component.name, - reason, - ) - return False - - # (5) add_new_language: creates translation file and DB record. Depends on (3) and (4). - try: - translation = component.add_new_language(language, request) - except Exception as e: - LOGGER.error("Failed to add language %s: %s", self.lang_code, e) - report_error(cause="Add language") - return False - - if translation is None: - storage = get_messages(request) - message = ( - "\n".join(m.message for m in storage) - if storage - else ( - getattr(component, "new_lang_error_message", None) - or f"Could not add {self.lang_code!r}!" - ) - ) - LOGGER.warning( - "Could not add language %s to %s: %s", - self.lang_code, - component.name, - message, - ) - return False - - time.sleep(settings.BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS) - - LOGGER.info("Added language %s to %s", self.lang_code, component.name) - return True - - def _delete_component_and_commit_removal( - self, component: Component, result: dict[str, Any] - ) -> None: - """ - Delete component, remove its translation files from disk, commit and push. - - Updates result["components_deleted"] and result["errors"] as needed. - """ - name = component.name - base_path = component.full_path - repo_owner = component.linked_component if component.is_repo_link else component - if repo_owner is None: - LOGGER.warning( - "Cannot push after delete: no linked component for %s", component.slug - ) - push_branch = None - push_url = None - else: - push_branch = repo_owner.push_branch - push_url = repo_owner.push - translation_files = [ - os.path.join(base_path, t.filename) - for t in component.translation_set.exclude( - language=component.source_language - ) - ] - component.delete() - - actually_removed = [] - for file_path in translation_files: - if os.path.isfile(file_path): - try: - os.remove(file_path) - actually_removed.append(file_path) - LOGGER.info("Removed translation file: %s", file_path) - except OSError as e: - LOGGER.warning( - "Failed to remove translation file %s: %s", - file_path, - e, - ) - result["errors"].append(f"Failed to remove {file_path}: {e}") - - if actually_removed and os.path.isdir(os.path.join(base_path, ".git")): - try: - # Stage only the removed files (not all tracked changes) - rel_paths = [os.path.relpath(p, base_path) for p in actually_removed] - subprocess.run( - ["git", "-C", base_path, "add", "--", *rel_paths], - check=True, - capture_output=True, - timeout=60, - ) - status = subprocess.run( - ["git", "-C", base_path, "status", "--porcelain"], - capture_output=True, - text=True, - timeout=10, - check=False, - ) - if status.stdout.strip(): - author = ( - f"{getattr(settings, 'DEFAULT_COMMITER_NAME', 'Weblate')} " - f"<{getattr(settings, 'DEFAULT_COMMITER_EMAIL', 'noreply@weblate.org')}>" - ) - subprocess.run( - [ - "git", - "-C", - base_path, - "commit", - "-m", - f"Remove translation files for deleted component: {name}", - "--author", - author, - ], - check=True, - capture_output=True, - timeout=30, - ) - LOGGER.info("Committed deletion of translation files for: %s", name) - if push_url and push_branch: - # Push current branch to remote push_branch - subprocess.run( - [ - "git", - "-C", - base_path, - "push", - "origin", - f"HEAD:{push_branch}", - ], - check=True, - capture_output=True, - timeout=120, - ) - LOGGER.info("Pushed to origin %s", push_branch) - except subprocess.CalledProcessError as e: - LOGGER.warning("Git commit/push failed for %s: %s", name, e.stderr or e) - result["errors"].append(f"Git commit/push failed: {e.stderr or e}") - except subprocess.TimeoutExpired: - LOGGER.warning("Git commit/push timeout for %s", name) - result["errors"].append("Git commit/push timeout") - - result["components_deleted"] += 1 - LOGGER.info("Deleted component (not in configs): %s", name) - - def process_submodule( - self, submodule: str, user=None, request=None - ) -> dict[str, Any]: - """Process a single submodule: clone, scan, create/update components.""" - if self.temp_dir is None: - msg = "process_submodule requires temp_dir; call process_all() instead" - raise TypeError(msg) - result: dict[str, Any] = { - "submodule": submodule, - "success": False, - "components_created": 0, - "components_updated": 0, - "components_deleted": 0, - "errors": [], - } - - # Create temp directory for this submodule - temp_submodule_dir = os.path.join(self.temp_dir, submodule) - resolved = Path(temp_submodule_dir).resolve() - temp_dir_resolved = Path(self.temp_dir).resolve() - try: - resolved.relative_to(temp_dir_resolved) - except ValueError: - result["errors"].append(f"Invalid submodule name: {submodule}") - return result - os.makedirs(temp_submodule_dir, exist_ok=True) - - # Clone repository - if not self.clone_repository( - submodule, temp_submodule_dir, f"local-{self.lang_code}" - ): - result["errors"].append(f"Failed to clone repository for {submodule}") - return result - - # Scan for documentation files - configs = self.scan_documentation_files(temp_submodule_dir) - if not configs: - result["errors"].append( - f"No supported documentation files found in {submodule}" - ) - return result - - LOGGER.info("Found %s documentation files in %s", len(configs), submodule) - - # Check permissions before creating so no Project is committed when denied - project_slug = f"boost-{_submodule_slug(submodule)}-documentation" - existing_project = Project.objects.filter(slug=project_slug).first() - if request is not None and user is not None: - if existing_project is not None: - if not user.has_perm("project.edit", existing_project): - result["errors"].append( - "Can not create components (missing project.edit)" - ) - return result - elif not user.has_perm("project.add"): - result["errors"].append("Can not create project (missing project.add)") - return result - - # Get or create project - try: - project = self.get_or_create_project(submodule, user) - except Exception as e: - result["errors"].append(f"Failed to create project: {e}") - report_error(cause="Project creation") - return result - - # Create or update components - for config in configs: - component, was_created = self.create_or_update_component( - project, submodule, config, user=user, request=request - ) - if component is not None: - if was_created: - result["components_created"] += 1 - else: - result["components_updated"] += 1 - - # Delete components that are not in configs (no longer in repo scan). - # Never delete glossary components (is_glossary); they are managed by Weblate. - wanted_slugs = {truncate_component_slug(c["component_slug"]) for c in configs} - for component in project.component_set.all(): - if component.slug not in wanted_slugs and not component.is_glossary: - try: - self._delete_component_and_commit_removal(component, result) - except Exception as e: - LOGGER.warning( - "Failed to delete component %s: %s", component.slug, e - ) - result["errors"].append(f"Failed to delete {component.slug}: {e}") - - result["success"] = True - return result - - def process_all( - self, submodules: list[str], user=None, request=None - ) -> dict[str, Any]: - """Process all submodules.""" - # Create temp directory - self.temp_dir = tempfile.mkdtemp(prefix="boost_endpoint_") - LOGGER.info("Using temp directory: %s", self.temp_dir) - - results: dict[str, Any] = { - "total_submodules": len(submodules), - "successful": 0, - "failed": 0, - "submodule_results": [], - } - - try: - for submodule in submodules: - LOGGER.info("Processing submodule: %s", submodule) - result = self.process_submodule(submodule, user=user, request=request) - results["submodule_results"].append(result) - - if result["success"]: - results["successful"] += 1 - else: - results["failed"] += 1 - - finally: - # Cleanup temp directory - if self.temp_dir and os.path.exists(self.temp_dir): - shutil.rmtree(self.temp_dir, ignore_errors=True) - LOGGER.info("Cleaned up temp directory: %s", self.temp_dir) - - return results +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Internal Django service for Boost documentation add-or-update. + +Uses only in-memory component data: no temporary JSON files. +Builds supported formats from Weblate's FILE_FORMATS (same as list_file_format_params). +Creates/updates Project and Component via Django ORM only (no external API). + +Alignment with REST API (POST /api/projects/, POST .../components/, POST .../translations/): +- Project: same as API (get_or_create + post_create when created). API does not use Celery for create. +- Component: same create + post_create; we then call do_update/create_translations_immediate so the + component is ready before adding a language. The API relies on Component.save() which schedules + component_after_save (Celery when not eager), so the API does not wait for repo/template in the request. +- Translation: same checks and add_new_language as API; we call create_translations_immediate before + so template is on disk (API assumes component was already synced). +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +import tempfile +import time +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast + +from django.conf import settings +from django.contrib.messages import get_messages +from django.db import transaction + +from weblate.formats.models import FILE_FORMATS +from weblate.lang.models import Language +from weblate.logger import LOGGER +from weblate.trans.models import Component, Project +from weblate.utils.errors import report_error +from weblate.vcs.base import RepositoryError + +if TYPE_CHECKING: + from weblate.lang.models import LanguageQuerySet + +# Weblate API limit for component name and slug (Component.name / Component.slug max_length) +MAX_COMPONENT_NAME_LENGTH = 100 +MAX_COMPONENT_SLUG_LENGTH = 100 +# When over limit: first 64 + " ... " + last 25 (94 chars) to keep names unique +TRUNCATE_NAME_HEAD = 64 +TRUNCATE_NAME_TAIL = 25 +TRUNCATE_NAME_SEP = " ... " +# Slug truncation: head + "-" + tail (100 chars max) to reduce collision risk for long paths +TRUNCATE_SLUG_HEAD = 64 +TRUNCATE_SLUG_TAIL = 35 +TRUNCATE_SLUG_SEP = "-" + + +def _submodule_slug(name: str) -> str: + """Normalize submodule name to URL-safe slug: lower case, underscores to hyphens.""" + return name.lower().replace("_", "-") + + +def truncate_component_name(name: str, max_len: int = MAX_COMPONENT_NAME_LENGTH) -> str: + """Truncate component name to max_len. If over limit: first 64 + ' ... ' + last 25.""" + if len(name) <= max_len: + return name + return name[:TRUNCATE_NAME_HEAD] + TRUNCATE_NAME_SEP + name[-TRUNCATE_NAME_TAIL:] + + +def truncate_component_slug(slug: str, max_len: int = MAX_COMPONENT_SLUG_LENGTH) -> str: + """Truncate component slug to max_len. If over limit: first 64 + '-' + last 35.""" + if len(slug) <= max_len: + return slug + return slug[:TRUNCATE_SLUG_HEAD] + TRUNCATE_SLUG_SEP + slug[-TRUNCATE_SLUG_TAIL:] + + +def _build_extension_to_format() -> dict[str, str]: + """Build extension -> format_id from Weblate FILE_FORMATS (internal API).""" + result = {} + for format_cls in FILE_FORMATS.data.values(): + format_id = getattr(format_cls, "format_id", None) + if not format_id or not getattr(format_cls, "autoload", ()): + continue + for pattern in format_cls.autoload: + # e.g. "*.adoc" -> ".adoc", "*.po" -> ".po" + if pattern.startswith("*.") and len(pattern) > 2: + ext = "." + pattern[2:].lower() + result[ext] = format_id + return result + + +class BoostComponentService: + """Service for managing Boost documentation components (internal Django usage).""" + + def __init__( + self, + organization: str, + lang_code: str, + version: str, + extensions: list[str] | None = None, + ): + self.organization = organization + self.lang_code = lang_code + self.version = version + self.extensions = extensions # If None or empty, no filtering by extension list + self.temp_dir: str | None = None + self._ext_to_format: dict[str, str] | None = None + + def get_extension_to_format(self) -> dict[str, str]: + """Extension -> Weblate format_id from FILE_FORMATS.""" + if self._ext_to_format is None: + self._ext_to_format = _build_extension_to_format() + return self._ext_to_format + + def get_supported_extensions(self) -> set[str]: + """ + Set of supported file extensions (from Weblate formats). + + If self.extensions is non-empty, restrict to those that are both + Weblate-supported and in the list. + """ + supported = set(self.get_extension_to_format().keys()) + if not self.extensions: + return supported + # Normalize: ensure leading dot and lower case for comparison + allowed = set() + for e in self.extensions: + e = e.strip().lower() + if e and not e.startswith("."): + e = "." + e + if e: + allowed.add(e) + return supported & allowed + + def clone_repository(self, submodule: str, target_dir: str, branch: str) -> bool: + """Clone a git repository to target directory.""" + repo_url = f"https://github.com/{self.organization}/{submodule}.git" + + try: + LOGGER.info("Cloning %s to %s", repo_url, target_dir) + cmd = ["git", "clone", "-b", branch, "--depth", "1", repo_url, target_dir] + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300, + check=False, + ) + + if result.returncode != 0: + LOGGER.error("Failed to clone: %s", result.stderr) + return False + + LOGGER.info("Cloned %s", submodule) + return True + + except subprocess.TimeoutExpired: + LOGGER.error("Clone timeout for %s", submodule) + return False + except Exception as e: + LOGGER.error("Clone exception: %s", e) + report_error(cause="Boost component clone") + return False + + def scan_documentation_files(self, repo_dir: str) -> list[dict[str, Any]]: + """ + Scan repo for doc files; return list of in-memory component configs. + + Only files in subfolders are included; files in repo root are skipped. + Uses get_supported_extensions() which respects self.extensions when set. + """ + supported_exts = self.get_supported_extensions() + configs = [] + + for root, dirs, files in os.walk(repo_dir): + # Skip hidden directories and common non-doc directories + dirs[:] = [ + d + for d in dirs + if not d.startswith(".") and d not in {"__pycache__", "node_modules"} + ] + + for file in files: + file_path = Path(root) / file + ext = file_path.suffix.lower() + + if ext not in supported_exts: + continue + + # Exclude translation files: filename like *_{lang_code} (e.g. intro_zh_Hans.adoc) + if file_path.stem.endswith("_" + self.lang_code): + continue + + relative_path = file_path.relative_to(repo_dir) + # Skip files in repo root (only include files in subfolders) + if len(relative_path.parts) <= 1: + continue + + config = self.generate_component_config(str(relative_path), ext) + if config: + configs.append(config) + + return configs + + def generate_component_config( + self, file_path: str, extension: str + ) -> dict[str, Any] | None: + """Build in-memory component config for a doc file (no JSON file written).""" + ext_to_fmt = self.get_extension_to_format() + file_format = ext_to_fmt.get(extension) + if not file_format: + return None + + # Extract file name without extension + path_obj = Path(file_path) + filename_base = path_obj.stem + dir_path = path_obj.parent + + # Generate component name from path (include extension so doc/intro.adoc vs doc/intro.md differ) + component_name_parts: list[str] = [] + if str(dir_path) != ".": + component_name_parts.extend(dir_path.parts) + component_name_parts.append(filename_base) + ext_display = extension.lstrip(".").lower() + component_name = " / ".join( + part.replace("_", " ").replace("-", " ").title() + for part in component_name_parts + ) + component_name = f"{component_name} ({ext_display})" + + # Generate slug (include extension so doc/intro.adoc vs doc/intro.md differ) + slug_parts = [part.lower().replace("_", "-") for part in component_name_parts] + slug_parts.append(extension.lstrip(".").lower()) + component_slug = "-".join(slug_parts) + + # File mask for translations (e.g., "doc/intro_*.adoc" for "doc/intro.adoc") + filemask = str(dir_path / f"{filename_base}_*{extension}") + template = file_path + new_base = file_path + + return { + "component_name": component_name, + "component_slug": component_slug, + "filemask": filemask, + "template": template, + "new_base": new_base, + "file_format": file_format, + "file_path": file_path, + } + + def get_or_create_project(self, submodule: str, user=None) -> Project: + """Get or create a Weblate project for the submodule.""" + slug = _submodule_slug(submodule) + submodule_title = submodule.replace("_", " ").title() + project_name = f"Boost {submodule_title} Translation ({self.lang_code})" + project_slug = f"boost-{slug}-documentation-{self.lang_code}" + project_web = ( + f"https://www.boost.org/doc/libs/master/libs/{submodule}/doc/html/" + ) + + with transaction.atomic(): + project, created = Project.objects.get_or_create( + slug=project_slug, + defaults={ + "name": project_name, + "web": project_web, + "instructions": ( + f"Please translate the Boost.{submodule.replace('_', ' ').title()} " + "documentation. Maintain technical accuracy and follow exact " + "formatting conventions." + ), + "access_control": Project.ACCESS_PUBLIC, + "commit_policy": 0, + }, + ) + + if created: + LOGGER.info("Created project: %s", project_name) + # Match API: ProjectViewSet.create uses perform_create -> post_create(user, billing). + if user: + project.post_create(user, billing=None) + else: + LOGGER.info("Project exists: %s", project_name) + + if user: + project.acting_user = user + + return project + + def create_or_update_component( + self, + project: Project, + submodule: str, + config: dict[str, Any], + user=None, + request=None, + ) -> tuple[Component | None, bool]: + """ + Create or update a component. Returns (component, was_created). + + Settings and logic aligned with scripts/auto/create_component.py and + scripts/auto/boost-submodule-component-configs/setup_boost-*-.json + (same as API POST projects/{project_slug}/components/). + """ + required_config_keys = { + "component_slug", + "component_name", + "filemask", + "template", + "new_base", + "file_format", + } + missing = required_config_keys - set(config.keys()) + if missing: + LOGGER.error("Invalid component config: missing keys %s", missing) + return None, False + + slug = _submodule_slug(submodule) + component_slug = truncate_component_slug( + f"boost-{slug}-documentation-{config['component_slug']}" + ) + # Push branch name: translation-{self.lang_code}-{self.version} + push_branch = f"translation-{self.lang_code}-{self.version}" + + # Component name: "Boost {Submodule} Documentation / Doc / Library Detail" + submodule_title = submodule.replace("_", " ").title() + component_name = truncate_component_name( + f"Boost {submodule_title} Documentation / {config['component_name']}" + ) + + # Source language: "en" (hardcoded) + try: + source_language = Language.objects.get(code="en") + except Language.DoesNotExist: + LOGGER.error("Source language 'en' not found; cannot create component") + report_error(cause="Component creation/update") + return None, False + + # Single clone per repo: first component gets real repo, others use weblate:// + real_repo = f"git@github.com:{self.organization}/{submodule}.git" + repo_owner = ( + Component.objects.filter(project=project, repo=real_repo) + .order_by("slug") + .first() + ) + if repo_owner is not None: + # Another component already has the clone; link to it + repo_url = f"weblate://{project.slug}/{repo_owner.slug}" + push_url = "" + else: + repo_url = real_repo + push_url = real_repo + + # Component defaults aligned with create_component.py / reference JSON + component_defaults = { + "name": component_name, + "vcs": "github", + "repo": repo_url, + "push": push_url, + "branch": f"local-{self.lang_code}", + "push_branch": push_branch, + "filemask": config["filemask"], + "template": config["template"], + "new_base": config["new_base"], + "file_format": config["file_format"], + "edit_template": False, + "source_language": source_language, + "license": "", + "allow_translation_propagation": False, + "enable_suggestions": True, + "suggestion_voting": False, + "suggestion_autoaccept": 0, + "check_flags": "", + "language_regex": f"^{self.lang_code}$", + "manage_units": False, + } + + try: + # Ensure project still exists (e.g. not deleted by another process) + if not Project.objects.filter(pk=project.pk).exists(): + project = self.get_or_create_project(submodule, user=user) + with transaction.atomic(): + component, created = Component.objects.get_or_create( + project=project, + slug=component_slug, + defaults=component_defaults, + ) + + if user: + component.acting_user = user + + if created: + LOGGER.info("Created component: %s", component.name) + # Match API: ProjectViewSet.components (POST) calls instance.post_create(user, origin="api") + if user: + component.post_create(user, origin="boost_endpoint") + # Synchronization: ensure repo/translations exist before add_language_to_component. + self._sync_component_for_translation( + component, request, created=True + ) + else: + LOGGER.info("Component exists: %s", component.name) + # Ensure branch is "local-{lang_code}" (avoid "fatal: no such branch: 'master'" + # when remote has no master/main) + update_fields = [] + if component.push_branch != push_branch: + component.push_branch = push_branch + update_fields.append("push_branch") + if update_fields: + component.save(update_fields=update_fields) + + # Trigger git pull only for repo owner; linked components share the same lock. + self._sync_component_for_translation( + component, request, created=False + ) + self.add_language_to_component(component, request) + + return component, created + + except Exception as e: + LOGGER.error( + "Failed to create/update component (%s): %s", + type(e).__name__, + e, + ) + report_error(cause="Component creation/update") + return None, False + + def _do_update_git_only(self, component: Component, request) -> bool: + """ + Perform only the git update (fetch, merge/rebase). Does not call create_translations. + + Mirrors Component.do_update lock block + push_if_needed; caller must call + create_translations_immediate after. + """ + component.translations_progress = 0 + component.translations_count = 0 + # Hold lock all time here to avoid somebody writing between commit + # and merge/rebase. + with component.repository.lock: + component.store_background_task() + component.progress_step(0) + component.configure_repo(pull=False) + + # pull remote + if not component.update_remote_branch(): + return False + + component.configure_branch() + + # do we have something to merge? + try: + needs_merge = component.repo_needs_merge() + except RepositoryError: + # Not yet configured repository + needs_merge = True + + if not needs_merge: + component.delete_alert("MergeFailure") + component.delete_alert("RepositoryOutdated") + return True + + # commit possible pending changes if needed + if component.needs_commit_upstream(): + component.commit_pending( + "update", request.user if request else None, skip_push=True + ) + + # update local branch + try: + result = component.update_branch(request, method=None, skip_push=True) + except RepositoryError: + result = False + + if result: + # Push after possible merge (create_translations is called by caller) + component.push_if_needed(do_update=False) + + if not component.repo_needs_push(): + component.delete_alert("RepositoryChanges") + + component.progress_step(100) + component.translations_count = None + + return result + + def _sync_component_for_translation( + self, component: Component, request, *, created: bool + ) -> None: + """Ensure repo/translations are ready before add_language_to_component. Idempotent.""" + if not component.is_repo_link: + try: + # For a newly created repo-owner component the VCS directory does not + # exist yet. sync_git_repo(validate=False) clones when is_valid() is + # False, then configures the repo and branch — exactly what the ORM- + # save path would do. For existing components we skip straight to the + # lighter _do_update_git_only (fetch + merge only). + if created and not component.repository.is_valid(): + component.sync_git_repo(skip_push=True) + LOGGER.info( + "Initial clone completed for new component: %s", component.name + ) + else: + result = self._do_update_git_only(component, request) + if result: + LOGGER.info("Updated component repository: %s", component.name) + else: + LOGGER.warning( + "Git update did not succeed for %s", component.name + ) + except Exception as e: + LOGGER.warning( + "Failed to %s %s: %s", + "clone/update new component" if created else "update component", + component.name, + e, + ) + report_error( + cause="Component creation" if created else "Component update" + ) + try: + component.create_translations_immediate(request=request, force=True) + LOGGER.info( + "%s: %s", + "Loaded translations for new repo link" + if created + else "Refreshed translations for repo link", + component.name, + ) + except Exception as e: + LOGGER.warning( + "Failed to %s %s: %s", + "load translations for new link" + if created + else "refresh translations for", + component.name, + e, + ) + + def add_language_to_component(self, component: Component, request=None) -> bool: + """ + Add language to component if not already added. + + Logic matches API view ComponentViewSet.translations (POST). + """ + if request is None: + LOGGER.error("add_language_to_component requires request for permissions") + return False + + try: + language = Language.objects.get(code=self.lang_code) + except Language.DoesNotExist: + LOGGER.error("Language %s not found", self.lang_code) + return False + + if component.translation_set.filter(language=language).exists(): + LOGGER.info( + "Language %s already exists in %s", self.lang_code, component.name + ) + return True + + # Check order: (1) permission, (2) language in allowed set, (3) sync, (4) policy/validity, (5) add. + # (1) has_perm("translation.add"): permission only, no I/O; fail fast. + if not request.user.has_perm("translation.add", component): + LOGGER.warning( + "Can not create translation: no translation.add on %s", component.name + ) + return False + + # (2) get_all_available_languages() + add_more filter: DB only. Ensure lang_code is in the + # allowed set (not already in component; if user lacks add_more, restrict to basic/project + # languages). Fail fast before any I/O so we do not sync when language is not addable. + base_languages = cast( + "LanguageQuerySet", component.get_all_available_languages() + ) + if not request.user.has_perm("translation.add_more", component): + base_languages = base_languages.filter_for_add(component.project) + if not base_languages.filter(pk=language.pk).exists(): + LOGGER.error( + "Could not add %r to %s (language not available)", + self.lang_code, + component.name, + ) + return False + + # (3) create_translations_immediate: loads translations and ensures template/new_base + # are on disk. Required before (4) because can_add_new_language checks file existence + # and template validity. + try: + component.create_translations_immediate(request=request, force=True) + except Exception as e: + LOGGER.warning("create_translations_immediate before add language: %s", e) + return False + + # (4) can_add_new_language: checks new_lang config, template/new_base existence and + # validity, is_valid_base_for_new. Depends on (3) so files exist. + if not component.can_add_new_language(request.user): + reason = ( + getattr(component, "new_lang_error_message", None) + or "Can not add new language" + ) + LOGGER.warning( + "Could not add language %s to %s: %s", + self.lang_code, + component.name, + reason, + ) + return False + + # (5) add_new_language: creates translation file and DB record. Depends on (3) and (4). + try: + translation = component.add_new_language(language, request) + except Exception as e: + LOGGER.error("Failed to add language %s: %s", self.lang_code, e) + report_error(cause="Add language") + return False + + if translation is None: + storage = get_messages(request) + message = ( + "\n".join(m.message for m in storage) + if storage + else ( + getattr(component, "new_lang_error_message", None) + or f"Could not add {self.lang_code!r}!" + ) + ) + LOGGER.warning( + "Could not add language %s to %s: %s", + self.lang_code, + component.name, + message, + ) + return False + + time.sleep(settings.BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS) + + LOGGER.info("Added language %s to %s", self.lang_code, component.name) + return True + + def _delete_component_and_commit_removal( + self, component: Component, result: dict[str, Any] + ) -> None: + """ + Delete component, remove its translation files from disk, commit and push. + + Updates result["components_deleted"] and result["errors"] as needed. + """ + name = component.name + base_path = component.full_path + repo_owner = component.linked_component if component.is_repo_link else component + if repo_owner is None: + LOGGER.warning( + "Cannot push after delete: no linked component for %s", component.slug + ) + push_branch = None + push_url = None + else: + push_branch = repo_owner.push_branch + push_url = repo_owner.push + translation_files = [ + os.path.join(base_path, t.filename) + for t in component.translation_set.exclude( + language=component.source_language + ) + ] + component.delete() + + actually_removed = [] + for file_path in translation_files: + if os.path.isfile(file_path): + try: + os.remove(file_path) + actually_removed.append(file_path) + LOGGER.info("Removed translation file: %s", file_path) + except OSError as e: + LOGGER.warning( + "Failed to remove translation file %s: %s", + file_path, + e, + ) + result["errors"].append(f"Failed to remove {file_path}: {e}") + + if actually_removed and os.path.isdir(os.path.join(base_path, ".git")): + try: + # Stage only the removed files (not all tracked changes) + rel_paths = [os.path.relpath(p, base_path) for p in actually_removed] + subprocess.run( + ["git", "-C", base_path, "add", "--", *rel_paths], + check=True, + capture_output=True, + timeout=60, + ) + status = subprocess.run( + ["git", "-C", base_path, "status", "--porcelain"], + capture_output=True, + text=True, + timeout=10, + check=False, + ) + if status.stdout.strip(): + author = ( + f"{getattr(settings, 'DEFAULT_COMMITER_NAME', 'Weblate')} " + f"<{getattr(settings, 'DEFAULT_COMMITER_EMAIL', 'noreply@weblate.org')}>" + ) + subprocess.run( + [ + "git", + "-C", + base_path, + "commit", + "-m", + f"Remove translation files for deleted component: {name}", + "--author", + author, + ], + check=True, + capture_output=True, + timeout=30, + ) + LOGGER.info("Committed deletion of translation files for: %s", name) + if push_url and push_branch: + # Push current branch to remote push_branch + subprocess.run( + [ + "git", + "-C", + base_path, + "push", + "origin", + f"HEAD:{push_branch}", + ], + check=True, + capture_output=True, + timeout=120, + ) + LOGGER.info("Pushed to origin %s", push_branch) + except subprocess.CalledProcessError as e: + LOGGER.warning("Git commit/push failed for %s: %s", name, e.stderr or e) + result["errors"].append(f"Git commit/push failed: {e.stderr or e}") + except subprocess.TimeoutExpired: + LOGGER.warning("Git commit/push timeout for %s", name) + result["errors"].append("Git commit/push timeout") + + result["components_deleted"] += 1 + LOGGER.info("Deleted component (not in configs): %s", name) + + def process_submodule( + self, submodule: str, user=None, request=None + ) -> dict[str, Any]: + """Process a single submodule: clone, scan, create/update components.""" + if self.temp_dir is None: + msg = "process_submodule requires temp_dir; call process_all() instead" + raise TypeError(msg) + result: dict[str, Any] = { + "submodule": submodule, + "success": False, + "components_created": 0, + "components_updated": 0, + "components_deleted": 0, + "errors": [], + } + + # Create temp directory for this submodule + temp_submodule_dir = os.path.join(self.temp_dir, submodule) + resolved = Path(temp_submodule_dir).resolve() + temp_dir_resolved = Path(self.temp_dir).resolve() + try: + resolved.relative_to(temp_dir_resolved) + except ValueError: + result["errors"].append(f"Invalid submodule name: {submodule}") + return result + os.makedirs(temp_submodule_dir, exist_ok=True) + + # Clone repository + if not self.clone_repository( + submodule, temp_submodule_dir, f"local-{self.lang_code}" + ): + result["errors"].append(f"Failed to clone repository for {submodule}") + return result + + # Scan for documentation files + configs = self.scan_documentation_files(temp_submodule_dir) + if not configs: + result["errors"].append( + f"No supported documentation files found in {submodule}" + ) + return result + + LOGGER.info("Found %s documentation files in %s", len(configs), submodule) + + # Check permissions before creating so no Project is committed when denied + project_slug = f"boost-{_submodule_slug(submodule)}-documentation" + existing_project = Project.objects.filter(slug=project_slug).first() + if request is not None and user is not None: + if existing_project is not None: + if not user.has_perm("project.edit", existing_project): + result["errors"].append( + "Can not create components (missing project.edit)" + ) + return result + elif not user.has_perm("project.add"): + result["errors"].append("Can not create project (missing project.add)") + return result + + # Get or create project + try: + project = self.get_or_create_project(submodule, user) + except Exception as e: + result["errors"].append(f"Failed to create project: {e}") + report_error(cause="Project creation") + return result + + # Create or update components + for config in configs: + component, was_created = self.create_or_update_component( + project, submodule, config, user=user, request=request + ) + if component is not None: + if was_created: + result["components_created"] += 1 + else: + result["components_updated"] += 1 + + # Delete components that are not in configs (no longer in repo scan). + # Never delete glossary components (is_glossary); they are managed by Weblate. + prefix = f"boost-{_submodule_slug(submodule)}-documentation-" + wanted_slugs = { + truncate_component_slug(f"{prefix}{c['component_slug']}") for c in configs + } + for component in project.component_set.all(): + if component.slug not in wanted_slugs and not component.is_glossary: + try: + self._delete_component_and_commit_removal(component, result) + except Exception as e: + LOGGER.warning( + "Failed to delete component %s: %s", component.slug, e + ) + result["errors"].append(f"Failed to delete {component.slug}: {e}") + + result["success"] = True + return result + + def process_all( + self, submodules: list[str], user=None, request=None + ) -> dict[str, Any]: + """Process all submodules.""" + # Create temp directory + self.temp_dir = tempfile.mkdtemp(prefix="boost_endpoint_") + LOGGER.info("Using temp directory: %s", self.temp_dir) + + results: dict[str, Any] = { + "total_submodules": len(submodules), + "successful": 0, + "failed": 0, + "submodule_results": [], + } + + try: + for submodule in submodules: + LOGGER.info("Processing submodule: %s", submodule) + result = self.process_submodule(submodule, user=user, request=request) + results["submodule_results"].append(result) + + if result["success"]: + results["successful"] += 1 + else: + results["failed"] += 1 + + finally: + # Cleanup temp directory + if self.temp_dir and os.path.exists(self.temp_dir): + shutil.rmtree(self.temp_dir, ignore_errors=True) + LOGGER.info("Cleaned up temp directory: %s", self.temp_dir) + + return results diff --git a/weblate/boost_endpoint/tasks.py b/weblate/boost_endpoint/tasks.py new file mode 100755 index 000000000000..6bf016ee6757 --- /dev/null +++ b/weblate/boost_endpoint/tasks.py @@ -0,0 +1,45 @@ +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" + +from __future__ import annotations + +from typing import Any + +from weblate.auth.models import AuthenticatedHttpRequest, User +from weblate.boost_endpoint.services import BoostComponentService +from weblate.utils.celery import app + + +@app.task(trail=False) +def boost_add_or_update_task( + *, + organization: str, + add_or_update: dict[str, list[str]], + version: str, + extensions: list[str] | None, + user_id: int, +) -> dict[str, Any]: + """ + Run BoostComponentService for each language (same logic as synchronous POST). + + Exceptions propagate so Celery marks the task failed and monitoring can alert. + """ + user = User.objects.get(pk=user_id) + request = AuthenticatedHttpRequest() + request.user = user + + results: dict[str, Any] = {} + for lang_code, submodules in add_or_update.items(): + service = BoostComponentService( + organization=organization, + lang_code=lang_code, + version=version, + extensions=extensions, + ) + results[lang_code] = service.process_all( + submodules, user=user, request=request + ) + return results diff --git a/weblate/boost_endpoint/urls.py b/weblate/boost_endpoint/urls.py old mode 100644 new mode 100755 index 7d90ea7946d6..8ad08d05bc0b --- a/weblate/boost_endpoint/urls.py +++ b/weblate/boost_endpoint/urls.py @@ -1,12 +1,12 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from django.urls import path - -from weblate.boost_endpoint.views import AddOrUpdateView, BoostEndpointInfo - -urlpatterns = [ - path("", BoostEndpointInfo.as_view(), name="info"), - path("add-or-update/", AddOrUpdateView.as_view(), name="add-or-update"), -] +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from django.urls import path + +from weblate.boost_endpoint.views import AddOrUpdateView, BoostEndpointInfo + +urlpatterns = [ + path("", BoostEndpointInfo.as_view(), name="info"), + path("add-or-update/", AddOrUpdateView.as_view(), name="add-or-update"), +] diff --git a/weblate/boost_endpoint/views.py b/weblate/boost_endpoint/views.py old mode 100644 new mode 100755 index 998ec8499e47..3f2d9f45cbf2 --- a/weblate/boost_endpoint/views.py +++ b/weblate/boost_endpoint/views.py @@ -1,75 +1,73 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from __future__ import annotations - -from rest_framework import status -from rest_framework.permissions import IsAuthenticated -from rest_framework.response import Response -from rest_framework.views import APIView - -from weblate.boost_endpoint.serializers import AddOrUpdateRequestSerializer -from weblate.boost_endpoint.services import BoostComponentService - - -class BoostEndpointInfo(APIView): - """Boost documentation translation API info.""" - - permission_classes = (IsAuthenticated,) - - def get(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 - """Return Boost endpoint module info.""" - return Response( - { - "module": "boost-endpoint", - "description": "Boost documentation translation API", - } - ) - - -class AddOrUpdateView(APIView): - """Add or update Boost documentation components.""" - - permission_classes = (IsAuthenticated,) - - def post(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 - """ - Create or update Boost documentation components. - - add_or_update is a map: lang_code -> [submodule names]. For each lang_code - the service runs with that language and its submodule list (clone, scan, - create/update project and components, add language). - """ - serializer = AddOrUpdateRequestSerializer(data=request.data) - if not serializer.is_valid(): - return Response( - {"errors": serializer.errors}, - status=status.HTTP_400_BAD_REQUEST, - ) - - data = serializer.validated_data - organization = data["organization"] - add_or_update = data["add_or_update"] - version = data["version"] - extensions = data.get("extensions") - - try: - results = {} - for lang_code, submodules in add_or_update.items(): - service = BoostComponentService( - organization=organization, - lang_code=lang_code, - version=version, - extensions=extensions, - ) - results[lang_code] = service.process_all( - submodules, user=request.user, request=request - ) - except Exception as exc: - return Response( - {"error": str(exc)}, - status=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - - return Response(results, status=status.HTTP_200_OK) +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import annotations + +from rest_framework import status +from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response +from rest_framework.views import APIView + +from weblate.boost_endpoint.serializers import AddOrUpdateRequestSerializer +from weblate.boost_endpoint.tasks import boost_add_or_update_task + + +class BoostEndpointInfo(APIView): + """Boost documentation translation API info.""" + + permission_classes = (IsAuthenticated,) + + def get(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 + """Return Boost endpoint module info.""" + return Response( + { + "module": "boost-endpoint", + "description": "Boost documentation translation API", + } + ) + + +class AddOrUpdateView(APIView): + """Add or update Boost documentation components.""" + + permission_classes = (IsAuthenticated,) + + def post(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 + """ + Create or update Boost documentation components. + + add_or_update is a map: lang_code -> [submodule names]. For each lang_code + the service runs with that language and its submodule list (clone, scan, + create/update project and components, add language). + + Heavy work runs in a Celery worker and returns immediately with HTTP 202 and + task_id so clients can validate the request without waiting for completion. + """ + serializer = AddOrUpdateRequestSerializer(data=request.data) + if not serializer.is_valid(): + return Response( + {"errors": serializer.errors}, + status=status.HTTP_400_BAD_REQUEST, + ) + + data = serializer.validated_data + async_result = boost_add_or_update_task.delay( + organization=data["organization"], + add_or_update=data["add_or_update"], + version=data["version"], + extensions=data.get("extensions"), + user_id=request.user.pk, + ) + + return Response( + { + "status": "accepted", + "task_id": str(async_result.id), + "detail": ( + "Boost add-or-update is running in the background; " + "check Celery logs or task result for completion." + ), + }, + status=status.HTTP_202_ACCEPTED, + ) From 0208bd59876bdd82bf650928f5b04bf4f3f6cf5f Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 00:10:02 -0600 Subject: [PATCH 07/15] Document environment variables and external dependencies. --- docs/admin/boost-weblate.rst | 186 +++++++++++++++++++++++++++++++++++ docs/api.rst | 5 + docs/formats.rst | 16 +++ docs/formats/asciidoc.rst | 11 +++ docs/formats/quickbook.rst | 34 +++++++ docs/index.rst | 1 + 6 files changed, 253 insertions(+) create mode 100644 docs/admin/boost-weblate.rst create mode 100644 docs/formats/quickbook.rst diff --git a/docs/admin/boost-weblate.rst b/docs/admin/boost-weblate.rst new file mode 100644 index 000000000000..e861d487bbb3 --- /dev/null +++ b/docs/admin/boost-weblate.rst @@ -0,0 +1,186 @@ +.. _boost-weblate: + +Boost Weblate additions +======================= + +This repository extends upstream Weblate with capabilities used for translating +`Boost C++ Libraries `_ documentation: QuickBook and +AsciiDoc handling tailored for Boost workflows, optional OpenRouter-based batch +machine translation, and a REST surface for CI-driven component maintenance. + +The sections below document fork-specific **configuration**, **dependencies**, +and **HTTP endpoints** that are not covered by generic Weblate documentation. + +.. seealso:: + + Standard administrator guides still apply: :doc:`install/docker`, + :ref:`docker-environment`, :doc:`machine`, and :doc:`config`. + +Python packages +--------------- + +OpenRouter batch translation uses the `OpenAI Python SDK `_ +(`OpenAI Client`) against the OpenRouter HTTP API. The SDK is **not** part of +core Weblate dependencies; install it explicitly: + +.. code-block:: sh + + pip install 'weblate[openai]' + # or + pip install 'openai>=2.0,<3.0' + +If the SDK is missing when OpenRouter translation runs, Weblate raises +:class:`django.core.exceptions.ImproperlyConfigured` with an installation hint. + +Docker images built from :file:`weblate-docker/Dockerfile` use +``WEBLATE_EXTRAS=all`` so the ``openai`` extra is included in the container. + +System commands and packages +---------------------------- + +The following executables must be available on the server **PATH** where the +relevant code paths execute (web workers, Celery workers): + +================ ================================================================ +Executable Used by +================ ================================================================ +``git`` Boost endpoint service: clone repositories, commit and push + translation changes (:mod:`weblate.boost_endpoint.services`). +``po4a-gettextize``, ``po4a-translate`` + AsciiDoc format pipeline (:mod:`weblate.formats.asciidoc`). +``msgattrib``, ``msgfmt`` + gettext toolchain for AsciiDoc save path; + ``msgattrib`` is optional (the code falls back if absent). +================ ================================================================ + +The official Docker image for this fork installs **po4a** from source during the +image build (see comments in :file:`weblate-docker/Dockerfile`). Custom or +bare-metal installs must provide **po4a** and **gettext** separately (for +example distribution packages for ``po4a`` and ``gettext``). + +Environment variables +--------------------- + +These variables apply to **Boost fork** behaviour. They do **not** use the +``WEBLATE_`` prefix. Standard Docker variables remain documented under +:ref:`docker-environment`. + +.. envvar:: OPENROUTER_API_KEY + + API key used when OpenRouter batch translation cannot read credentials from + Weblate’s machinery configuration (see :ref:`boost-weblate-openrouter-config`). + Read by :mod:`weblate.trans.autobatchtranslate`. + +.. envvar:: OPENROUTER_MODEL + + Model identifier passed to OpenRouter (for example ``deepseek/deepseek-chat``). + Default if unset: ``deepseek/deepseek-chat``. Used together with + :envvar:`OPENROUTER_API_KEY` as an environment fallback. + +.. envvar:: AUTO_BATCH_TRANSLATE_VIA_OPENROUTER + + Boolean interpreted by :file:`weblate/settings_docker.py`. When ``true`` + (Docker default), components may trigger automatic batch translation via + OpenRouter according to internal workflows. When ``false``, that behaviour is + disabled. For non-Docker installs, set ``AUTO_BATCH_TRANSLATE_VIA_OPENROUTER`` + in :file:`settings.py`. + +.. envvar:: BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS + + Integer seconds to wait when the Boost endpoint waits for a component or + translation to become ready before adding a language (polling interval is + derived from this setting in :mod:`weblate.boost_endpoint.services`). + Default in Docker: ``300``. Override per deployment if repositories are slow + or fast to sync. + +.. _boost-weblate-openrouter-config: + +OpenRouter credentials (batch translation) +------------------------------------------ + +Batch OpenRouter translation resolves configuration in this order: + +#. **Weblate machinery settings** — category MT, ``openai`` entry with ``key`` + (API key) and ``custom_model`` (model id). This mirrors fields used for the + generic OpenAI-compatible machinery documented under :ref:`mt-openai`. +#. **Environment variables** — :envvar:`OPENROUTER_API_KEY` and + :envvar:`OPENROUTER_MODEL` when the database configuration does not supply both + values. + +If no usable key and model are found, auto-translation is skipped and a warning +is logged. + +REST API: ``/boost-endpoint/`` +------------------------------- + +These endpoints are **not** part of the ``/api/`` namespace and are **not** +included in the OpenAPI schema served at ``/api/schema/``. They require an +authenticated user (same token mechanism as :ref:`api-tokens`). + +Base path (relative to your site root): ``/boost-endpoint/``. + +.. http:get:: /boost-endpoint/ + + Returns a short JSON description of the Boost endpoint module. + + :reqheader Authorization: ``Token …`` (required) + + :status 200: + + .. code-block:: json + + { + "module": "boost-endpoint", + "description": "Boost documentation translation API" + } + +.. http:post:: /boost-endpoint/add-or-update/ + + Accepts a job description and enqueues asynchronous work on a Celery worker. + The HTTP response returns immediately with a task identifier. + + :reqheader Authorization: ``Token …`` (required) + :reqheader Content-Type: ``application/json`` + + :", + "detail": "Boost add-or-update is running in the background; check Celery logs or task result for completion." + } + + :status 400: Validation error. + + .. code-block:: json + + { "errors": { "...": ["..."] } } + +Related Django settings +----------------------- + +The following settings appear in :file:`weblate/settings_example.py` for +non-Docker deployments: + +``AUTO_BATCH_TRANSLATE_VIA_OPENROUTER`` + Enables or disables OpenRouter batch translation hooks. Defaults to + ``False`` in the example settings file; Docker defaults differ via + :envvar:`AUTO_BATCH_TRANSLATE_VIA_OPENROUTER`. + +``BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS`` + Delay used when waiting for components during Boost endpoint processing. + Example file sets ``150`` seconds; Docker overrides via + :envvar:`BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS` unless customised. + +File formats +------------ + +* :doc:`../formats/quickbook` — QuickBook ``.qbk`` (fork-specific). +* :doc:`../formats/asciidoc` — AsciiDoc (implementation notes including **po4a**). diff --git a/docs/api.rst b/docs/api.rst index 93a4f90c8fc8..d5f242ae02dc 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -20,6 +20,11 @@ can browse at ``/api/docs/``. incomplete at this point and subject to change. Please consult the documentation below for more detailed information on the API. +.. note:: + + **Boost Weblate fork:** authenticated endpoints under ``/boost-endpoint/`` + (outside ``/api/``) are documented in :doc:`admin/boost-weblate`. + .. _api-generic: Authentication and generic parameters diff --git a/docs/formats.rst b/docs/formats.rst index e2483a36e95e..254ed9dcc2a4 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -346,6 +346,22 @@ Translation types capabilities - no - no - + * - :ref:`asciidoc` + - mono + - no + - no + - no + - no + - no + - + * - :ref:`quickbook` + - mono + - no + - no + - no + - no + - no + - .. [#m] See :ref:`bimono` .. [#p] See :ref:`format-plurals` diff --git a/docs/formats/asciidoc.rst b/docs/formats/asciidoc.rst index 4d8a2d5f6e0c..b74cb6fac4d2 100644 --- a/docs/formats/asciidoc.rst +++ b/docs/formats/asciidoc.rst @@ -11,6 +11,17 @@ The translatable content is extracted from the AsciiDoc files and offered for th .. include:: /snippets/format-database-backed.rst +System dependencies (Boost Weblate) ++++++++++++++++++++++++++++++++++++ + +This implementation extracts and merges translations using **po4a** +(``po4a-gettextize``, ``po4a-translate``) and the gettext utilities ``msgattrib`` +and ``msgfmt``. Install the corresponding system packages on application and +Celery hosts, or use the Docker image built from this repository (po4a is +installed during the image build—see :file:`weblate-docker/Dockerfile`). + +Full operational notes: :doc:`../admin/boost-weblate`. + .. seealso:: :doc:`tt:formats/asciidoc` diff --git a/docs/formats/quickbook.rst b/docs/formats/quickbook.rst new file mode 100644 index 000000000000..5772872a9a7e --- /dev/null +++ b/docs/formats/quickbook.rst @@ -0,0 +1,34 @@ +.. _quickbook: + +QuickBook files +--------------- + +.. note:: + + QuickBook support is provided by the Boost Weblate fork. Upstream Weblate + releases may not include this format. + +QuickBook (``.qbk``) is a markup language used in Boost documentation. This +Weblate build registers :guilabel:`QuickBook file` as a monolingual +:ref:`ConvertFormat ` handler: translatable strings are extracted into +gettext PO stores and merged back into QuickBook sources using a built-in parser +(:mod:`weblate.utils.quickbook`). + +There is **no** external converter binary (such as ``po4a``) required for +QuickBook in this fork—only Python dependencies from the main ``weblate`` +package install. + +Typical component setup ++++++++++++++++++++++++ + ++--------------------------------+-------------------------------------+ +| Typical Weblate :ref:`component` | ++================================+=====================================+ +| File mask | ``path/*.qbk`` | ++--------------------------------+-------------------------------------+ +| Monolingual base language file | ``path/en.qbk`` | ++--------------------------------+-------------------------------------+ +| Template for new translations | Same as base language file | ++--------------------------------+-------------------------------------+ +| File format | QuickBook file | ++--------------------------------+-------------------------------------+ diff --git a/docs/index.rst b/docs/index.rst index 4b32379e0c6c..8d266f906fec 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -96,6 +96,7 @@ Learn more about :ref:`contributing`. :hidden: admin/install + admin/boost-weblate admin/deployments admin/upgrade admin/backup From 88439b0ba78337c6386a58c664f8f3da0059f55e Mon Sep 17 00:00:00 2001 From: AuraMindNest <242653549+AuraMindNest@users.noreply.github.com> Date: Thu, 7 May 2026 06:21:20 +0000 Subject: [PATCH 08/15] docs: Documentation snippets update --- weblate/boost_endpoint/__init__.py | 6 +- weblate/boost_endpoint/apps.py | 22 +- weblate/boost_endpoint/serializers.py | 74 +- weblate/boost_endpoint/services.py | 1752 ++++++++++++------------- weblate/boost_endpoint/tasks.py | 88 +- weblate/boost_endpoint/urls.py | 24 +- weblate/boost_endpoint/views.py | 146 +-- 7 files changed, 1055 insertions(+), 1057 deletions(-) diff --git a/weblate/boost_endpoint/__init__.py b/weblate/boost_endpoint/__init__.py index c85711e15388..37ffce0e3073 100755 --- a/weblate/boost_endpoint/__init__.py +++ b/weblate/boost_endpoint/__init__.py @@ -1,3 +1,3 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later diff --git a/weblate/boost_endpoint/apps.py b/weblate/boost_endpoint/apps.py index 47315bb1537d..1922edf20998 100755 --- a/weblate/boost_endpoint/apps.py +++ b/weblate/boost_endpoint/apps.py @@ -1,11 +1,11 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from django.apps import AppConfig - - -class BoostEndpointConfig(AppConfig): - name = "weblate.boost_endpoint" - label = "boost_endpoint" - verbose_name = "Boost documentation translation API" +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from django.apps import AppConfig + + +class BoostEndpointConfig(AppConfig): + name = "weblate.boost_endpoint" + label = "boost_endpoint" + verbose_name = "Boost documentation translation API" diff --git a/weblate/boost_endpoint/serializers.py b/weblate/boost_endpoint/serializers.py index 9170805f8f5d..86571a226d03 100755 --- a/weblate/boost_endpoint/serializers.py +++ b/weblate/boost_endpoint/serializers.py @@ -1,37 +1,37 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from rest_framework import serializers - - -class AddOrUpdateRequestSerializer(serializers.Serializer): - """Serializer for add_or_update endpoint request.""" - - organization = serializers.CharField( - required=True, help_text="GitHub organization name (e.g., 'CppDigest')" - ) - add_or_update = serializers.DictField( - child=serializers.ListField(child=serializers.CharField()), - required=True, - allow_empty=False, - help_text=( - "Map language code -> list of submodule names. " - 'E.g. {"zh_Hans": ["json", "unordered"], "ja": ["json"]}. ' - "Service runs for each lang_code with its submodule array." - ), - ) - version = serializers.CharField( - required=True, help_text="Boost version (e.g., 'boost-1.90.0')" - ) - extensions = serializers.ListField( - child=serializers.CharField(allow_blank=True), - required=False, - allow_null=True, - default=None, - help_text=( - "Optional list of file extensions to include (e.g. ['.adoc', '.md']). " - "Only Weblate-supported extensions in this list are scanned. " - "If None or empty, all Weblate-supported extensions are used." - ), - ) +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from rest_framework import serializers + + +class AddOrUpdateRequestSerializer(serializers.Serializer): + """Serializer for add_or_update endpoint request.""" + + organization = serializers.CharField( + required=True, help_text="GitHub organization name (e.g., 'CppDigest')" + ) + add_or_update = serializers.DictField( + child=serializers.ListField(child=serializers.CharField()), + required=True, + allow_empty=False, + help_text=( + "Map language code -> list of submodule names. " + 'E.g. {"zh_Hans": ["json", "unordered"], "ja": ["json"]}. ' + "Service runs for each lang_code with its submodule array." + ), + ) + version = serializers.CharField( + required=True, help_text="Boost version (e.g., 'boost-1.90.0')" + ) + extensions = serializers.ListField( + child=serializers.CharField(allow_blank=True), + required=False, + allow_null=True, + default=None, + help_text=( + "Optional list of file extensions to include (e.g. ['.adoc', '.md']). " + "Only Weblate-supported extensions in this list are scanned. " + "If None or empty, all Weblate-supported extensions are used." + ), + ) diff --git a/weblate/boost_endpoint/services.py b/weblate/boost_endpoint/services.py index 4259ae38086a..fcdef71e9759 100755 --- a/weblate/boost_endpoint/services.py +++ b/weblate/boost_endpoint/services.py @@ -1,876 +1,876 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -""" -Internal Django service for Boost documentation add-or-update. - -Uses only in-memory component data: no temporary JSON files. -Builds supported formats from Weblate's FILE_FORMATS (same as list_file_format_params). -Creates/updates Project and Component via Django ORM only (no external API). - -Alignment with REST API (POST /api/projects/, POST .../components/, POST .../translations/): -- Project: same as API (get_or_create + post_create when created). API does not use Celery for create. -- Component: same create + post_create; we then call do_update/create_translations_immediate so the - component is ready before adding a language. The API relies on Component.save() which schedules - component_after_save (Celery when not eager), so the API does not wait for repo/template in the request. -- Translation: same checks and add_new_language as API; we call create_translations_immediate before - so template is on disk (API assumes component was already synced). -""" - -from __future__ import annotations - -import os -import shutil -import subprocess -import tempfile -import time -from pathlib import Path -from typing import TYPE_CHECKING, Any, cast - -from django.conf import settings -from django.contrib.messages import get_messages -from django.db import transaction - -from weblate.formats.models import FILE_FORMATS -from weblate.lang.models import Language -from weblate.logger import LOGGER -from weblate.trans.models import Component, Project -from weblate.utils.errors import report_error -from weblate.vcs.base import RepositoryError - -if TYPE_CHECKING: - from weblate.lang.models import LanguageQuerySet - -# Weblate API limit for component name and slug (Component.name / Component.slug max_length) -MAX_COMPONENT_NAME_LENGTH = 100 -MAX_COMPONENT_SLUG_LENGTH = 100 -# When over limit: first 64 + " ... " + last 25 (94 chars) to keep names unique -TRUNCATE_NAME_HEAD = 64 -TRUNCATE_NAME_TAIL = 25 -TRUNCATE_NAME_SEP = " ... " -# Slug truncation: head + "-" + tail (100 chars max) to reduce collision risk for long paths -TRUNCATE_SLUG_HEAD = 64 -TRUNCATE_SLUG_TAIL = 35 -TRUNCATE_SLUG_SEP = "-" - - -def _submodule_slug(name: str) -> str: - """Normalize submodule name to URL-safe slug: lower case, underscores to hyphens.""" - return name.lower().replace("_", "-") - - -def truncate_component_name(name: str, max_len: int = MAX_COMPONENT_NAME_LENGTH) -> str: - """Truncate component name to max_len. If over limit: first 64 + ' ... ' + last 25.""" - if len(name) <= max_len: - return name - return name[:TRUNCATE_NAME_HEAD] + TRUNCATE_NAME_SEP + name[-TRUNCATE_NAME_TAIL:] - - -def truncate_component_slug(slug: str, max_len: int = MAX_COMPONENT_SLUG_LENGTH) -> str: - """Truncate component slug to max_len. If over limit: first 64 + '-' + last 35.""" - if len(slug) <= max_len: - return slug - return slug[:TRUNCATE_SLUG_HEAD] + TRUNCATE_SLUG_SEP + slug[-TRUNCATE_SLUG_TAIL:] - - -def _build_extension_to_format() -> dict[str, str]: - """Build extension -> format_id from Weblate FILE_FORMATS (internal API).""" - result = {} - for format_cls in FILE_FORMATS.data.values(): - format_id = getattr(format_cls, "format_id", None) - if not format_id or not getattr(format_cls, "autoload", ()): - continue - for pattern in format_cls.autoload: - # e.g. "*.adoc" -> ".adoc", "*.po" -> ".po" - if pattern.startswith("*.") and len(pattern) > 2: - ext = "." + pattern[2:].lower() - result[ext] = format_id - return result - - -class BoostComponentService: - """Service for managing Boost documentation components (internal Django usage).""" - - def __init__( - self, - organization: str, - lang_code: str, - version: str, - extensions: list[str] | None = None, - ): - self.organization = organization - self.lang_code = lang_code - self.version = version - self.extensions = extensions # If None or empty, no filtering by extension list - self.temp_dir: str | None = None - self._ext_to_format: dict[str, str] | None = None - - def get_extension_to_format(self) -> dict[str, str]: - """Extension -> Weblate format_id from FILE_FORMATS.""" - if self._ext_to_format is None: - self._ext_to_format = _build_extension_to_format() - return self._ext_to_format - - def get_supported_extensions(self) -> set[str]: - """ - Set of supported file extensions (from Weblate formats). - - If self.extensions is non-empty, restrict to those that are both - Weblate-supported and in the list. - """ - supported = set(self.get_extension_to_format().keys()) - if not self.extensions: - return supported - # Normalize: ensure leading dot and lower case for comparison - allowed = set() - for e in self.extensions: - e = e.strip().lower() - if e and not e.startswith("."): - e = "." + e - if e: - allowed.add(e) - return supported & allowed - - def clone_repository(self, submodule: str, target_dir: str, branch: str) -> bool: - """Clone a git repository to target directory.""" - repo_url = f"https://github.com/{self.organization}/{submodule}.git" - - try: - LOGGER.info("Cloning %s to %s", repo_url, target_dir) - cmd = ["git", "clone", "-b", branch, "--depth", "1", repo_url, target_dir] - result = subprocess.run( - cmd, - capture_output=True, - text=True, - timeout=300, - check=False, - ) - - if result.returncode != 0: - LOGGER.error("Failed to clone: %s", result.stderr) - return False - - LOGGER.info("Cloned %s", submodule) - return True - - except subprocess.TimeoutExpired: - LOGGER.error("Clone timeout for %s", submodule) - return False - except Exception as e: - LOGGER.error("Clone exception: %s", e) - report_error(cause="Boost component clone") - return False - - def scan_documentation_files(self, repo_dir: str) -> list[dict[str, Any]]: - """ - Scan repo for doc files; return list of in-memory component configs. - - Only files in subfolders are included; files in repo root are skipped. - Uses get_supported_extensions() which respects self.extensions when set. - """ - supported_exts = self.get_supported_extensions() - configs = [] - - for root, dirs, files in os.walk(repo_dir): - # Skip hidden directories and common non-doc directories - dirs[:] = [ - d - for d in dirs - if not d.startswith(".") and d not in {"__pycache__", "node_modules"} - ] - - for file in files: - file_path = Path(root) / file - ext = file_path.suffix.lower() - - if ext not in supported_exts: - continue - - # Exclude translation files: filename like *_{lang_code} (e.g. intro_zh_Hans.adoc) - if file_path.stem.endswith("_" + self.lang_code): - continue - - relative_path = file_path.relative_to(repo_dir) - # Skip files in repo root (only include files in subfolders) - if len(relative_path.parts) <= 1: - continue - - config = self.generate_component_config(str(relative_path), ext) - if config: - configs.append(config) - - return configs - - def generate_component_config( - self, file_path: str, extension: str - ) -> dict[str, Any] | None: - """Build in-memory component config for a doc file (no JSON file written).""" - ext_to_fmt = self.get_extension_to_format() - file_format = ext_to_fmt.get(extension) - if not file_format: - return None - - # Extract file name without extension - path_obj = Path(file_path) - filename_base = path_obj.stem - dir_path = path_obj.parent - - # Generate component name from path (include extension so doc/intro.adoc vs doc/intro.md differ) - component_name_parts: list[str] = [] - if str(dir_path) != ".": - component_name_parts.extend(dir_path.parts) - component_name_parts.append(filename_base) - ext_display = extension.lstrip(".").lower() - component_name = " / ".join( - part.replace("_", " ").replace("-", " ").title() - for part in component_name_parts - ) - component_name = f"{component_name} ({ext_display})" - - # Generate slug (include extension so doc/intro.adoc vs doc/intro.md differ) - slug_parts = [part.lower().replace("_", "-") for part in component_name_parts] - slug_parts.append(extension.lstrip(".").lower()) - component_slug = "-".join(slug_parts) - - # File mask for translations (e.g., "doc/intro_*.adoc" for "doc/intro.adoc") - filemask = str(dir_path / f"{filename_base}_*{extension}") - template = file_path - new_base = file_path - - return { - "component_name": component_name, - "component_slug": component_slug, - "filemask": filemask, - "template": template, - "new_base": new_base, - "file_format": file_format, - "file_path": file_path, - } - - def get_or_create_project(self, submodule: str, user=None) -> Project: - """Get or create a Weblate project for the submodule.""" - slug = _submodule_slug(submodule) - submodule_title = submodule.replace("_", " ").title() - project_name = f"Boost {submodule_title} Translation ({self.lang_code})" - project_slug = f"boost-{slug}-documentation-{self.lang_code}" - project_web = ( - f"https://www.boost.org/doc/libs/master/libs/{submodule}/doc/html/" - ) - - with transaction.atomic(): - project, created = Project.objects.get_or_create( - slug=project_slug, - defaults={ - "name": project_name, - "web": project_web, - "instructions": ( - f"Please translate the Boost.{submodule.replace('_', ' ').title()} " - "documentation. Maintain technical accuracy and follow exact " - "formatting conventions." - ), - "access_control": Project.ACCESS_PUBLIC, - "commit_policy": 0, - }, - ) - - if created: - LOGGER.info("Created project: %s", project_name) - # Match API: ProjectViewSet.create uses perform_create -> post_create(user, billing). - if user: - project.post_create(user, billing=None) - else: - LOGGER.info("Project exists: %s", project_name) - - if user: - project.acting_user = user - - return project - - def create_or_update_component( - self, - project: Project, - submodule: str, - config: dict[str, Any], - user=None, - request=None, - ) -> tuple[Component | None, bool]: - """ - Create or update a component. Returns (component, was_created). - - Settings and logic aligned with scripts/auto/create_component.py and - scripts/auto/boost-submodule-component-configs/setup_boost-*-.json - (same as API POST projects/{project_slug}/components/). - """ - required_config_keys = { - "component_slug", - "component_name", - "filemask", - "template", - "new_base", - "file_format", - } - missing = required_config_keys - set(config.keys()) - if missing: - LOGGER.error("Invalid component config: missing keys %s", missing) - return None, False - - slug = _submodule_slug(submodule) - component_slug = truncate_component_slug( - f"boost-{slug}-documentation-{config['component_slug']}" - ) - # Push branch name: translation-{self.lang_code}-{self.version} - push_branch = f"translation-{self.lang_code}-{self.version}" - - # Component name: "Boost {Submodule} Documentation / Doc / Library Detail" - submodule_title = submodule.replace("_", " ").title() - component_name = truncate_component_name( - f"Boost {submodule_title} Documentation / {config['component_name']}" - ) - - # Source language: "en" (hardcoded) - try: - source_language = Language.objects.get(code="en") - except Language.DoesNotExist: - LOGGER.error("Source language 'en' not found; cannot create component") - report_error(cause="Component creation/update") - return None, False - - # Single clone per repo: first component gets real repo, others use weblate:// - real_repo = f"git@github.com:{self.organization}/{submodule}.git" - repo_owner = ( - Component.objects.filter(project=project, repo=real_repo) - .order_by("slug") - .first() - ) - if repo_owner is not None: - # Another component already has the clone; link to it - repo_url = f"weblate://{project.slug}/{repo_owner.slug}" - push_url = "" - else: - repo_url = real_repo - push_url = real_repo - - # Component defaults aligned with create_component.py / reference JSON - component_defaults = { - "name": component_name, - "vcs": "github", - "repo": repo_url, - "push": push_url, - "branch": f"local-{self.lang_code}", - "push_branch": push_branch, - "filemask": config["filemask"], - "template": config["template"], - "new_base": config["new_base"], - "file_format": config["file_format"], - "edit_template": False, - "source_language": source_language, - "license": "", - "allow_translation_propagation": False, - "enable_suggestions": True, - "suggestion_voting": False, - "suggestion_autoaccept": 0, - "check_flags": "", - "language_regex": f"^{self.lang_code}$", - "manage_units": False, - } - - try: - # Ensure project still exists (e.g. not deleted by another process) - if not Project.objects.filter(pk=project.pk).exists(): - project = self.get_or_create_project(submodule, user=user) - with transaction.atomic(): - component, created = Component.objects.get_or_create( - project=project, - slug=component_slug, - defaults=component_defaults, - ) - - if user: - component.acting_user = user - - if created: - LOGGER.info("Created component: %s", component.name) - # Match API: ProjectViewSet.components (POST) calls instance.post_create(user, origin="api") - if user: - component.post_create(user, origin="boost_endpoint") - # Synchronization: ensure repo/translations exist before add_language_to_component. - self._sync_component_for_translation( - component, request, created=True - ) - else: - LOGGER.info("Component exists: %s", component.name) - # Ensure branch is "local-{lang_code}" (avoid "fatal: no such branch: 'master'" - # when remote has no master/main) - update_fields = [] - if component.push_branch != push_branch: - component.push_branch = push_branch - update_fields.append("push_branch") - if update_fields: - component.save(update_fields=update_fields) - - # Trigger git pull only for repo owner; linked components share the same lock. - self._sync_component_for_translation( - component, request, created=False - ) - self.add_language_to_component(component, request) - - return component, created - - except Exception as e: - LOGGER.error( - "Failed to create/update component (%s): %s", - type(e).__name__, - e, - ) - report_error(cause="Component creation/update") - return None, False - - def _do_update_git_only(self, component: Component, request) -> bool: - """ - Perform only the git update (fetch, merge/rebase). Does not call create_translations. - - Mirrors Component.do_update lock block + push_if_needed; caller must call - create_translations_immediate after. - """ - component.translations_progress = 0 - component.translations_count = 0 - # Hold lock all time here to avoid somebody writing between commit - # and merge/rebase. - with component.repository.lock: - component.store_background_task() - component.progress_step(0) - component.configure_repo(pull=False) - - # pull remote - if not component.update_remote_branch(): - return False - - component.configure_branch() - - # do we have something to merge? - try: - needs_merge = component.repo_needs_merge() - except RepositoryError: - # Not yet configured repository - needs_merge = True - - if not needs_merge: - component.delete_alert("MergeFailure") - component.delete_alert("RepositoryOutdated") - return True - - # commit possible pending changes if needed - if component.needs_commit_upstream(): - component.commit_pending( - "update", request.user if request else None, skip_push=True - ) - - # update local branch - try: - result = component.update_branch(request, method=None, skip_push=True) - except RepositoryError: - result = False - - if result: - # Push after possible merge (create_translations is called by caller) - component.push_if_needed(do_update=False) - - if not component.repo_needs_push(): - component.delete_alert("RepositoryChanges") - - component.progress_step(100) - component.translations_count = None - - return result - - def _sync_component_for_translation( - self, component: Component, request, *, created: bool - ) -> None: - """Ensure repo/translations are ready before add_language_to_component. Idempotent.""" - if not component.is_repo_link: - try: - # For a newly created repo-owner component the VCS directory does not - # exist yet. sync_git_repo(validate=False) clones when is_valid() is - # False, then configures the repo and branch — exactly what the ORM- - # save path would do. For existing components we skip straight to the - # lighter _do_update_git_only (fetch + merge only). - if created and not component.repository.is_valid(): - component.sync_git_repo(skip_push=True) - LOGGER.info( - "Initial clone completed for new component: %s", component.name - ) - else: - result = self._do_update_git_only(component, request) - if result: - LOGGER.info("Updated component repository: %s", component.name) - else: - LOGGER.warning( - "Git update did not succeed for %s", component.name - ) - except Exception as e: - LOGGER.warning( - "Failed to %s %s: %s", - "clone/update new component" if created else "update component", - component.name, - e, - ) - report_error( - cause="Component creation" if created else "Component update" - ) - try: - component.create_translations_immediate(request=request, force=True) - LOGGER.info( - "%s: %s", - "Loaded translations for new repo link" - if created - else "Refreshed translations for repo link", - component.name, - ) - except Exception as e: - LOGGER.warning( - "Failed to %s %s: %s", - "load translations for new link" - if created - else "refresh translations for", - component.name, - e, - ) - - def add_language_to_component(self, component: Component, request=None) -> bool: - """ - Add language to component if not already added. - - Logic matches API view ComponentViewSet.translations (POST). - """ - if request is None: - LOGGER.error("add_language_to_component requires request for permissions") - return False - - try: - language = Language.objects.get(code=self.lang_code) - except Language.DoesNotExist: - LOGGER.error("Language %s not found", self.lang_code) - return False - - if component.translation_set.filter(language=language).exists(): - LOGGER.info( - "Language %s already exists in %s", self.lang_code, component.name - ) - return True - - # Check order: (1) permission, (2) language in allowed set, (3) sync, (4) policy/validity, (5) add. - # (1) has_perm("translation.add"): permission only, no I/O; fail fast. - if not request.user.has_perm("translation.add", component): - LOGGER.warning( - "Can not create translation: no translation.add on %s", component.name - ) - return False - - # (2) get_all_available_languages() + add_more filter: DB only. Ensure lang_code is in the - # allowed set (not already in component; if user lacks add_more, restrict to basic/project - # languages). Fail fast before any I/O so we do not sync when language is not addable. - base_languages = cast( - "LanguageQuerySet", component.get_all_available_languages() - ) - if not request.user.has_perm("translation.add_more", component): - base_languages = base_languages.filter_for_add(component.project) - if not base_languages.filter(pk=language.pk).exists(): - LOGGER.error( - "Could not add %r to %s (language not available)", - self.lang_code, - component.name, - ) - return False - - # (3) create_translations_immediate: loads translations and ensures template/new_base - # are on disk. Required before (4) because can_add_new_language checks file existence - # and template validity. - try: - component.create_translations_immediate(request=request, force=True) - except Exception as e: - LOGGER.warning("create_translations_immediate before add language: %s", e) - return False - - # (4) can_add_new_language: checks new_lang config, template/new_base existence and - # validity, is_valid_base_for_new. Depends on (3) so files exist. - if not component.can_add_new_language(request.user): - reason = ( - getattr(component, "new_lang_error_message", None) - or "Can not add new language" - ) - LOGGER.warning( - "Could not add language %s to %s: %s", - self.lang_code, - component.name, - reason, - ) - return False - - # (5) add_new_language: creates translation file and DB record. Depends on (3) and (4). - try: - translation = component.add_new_language(language, request) - except Exception as e: - LOGGER.error("Failed to add language %s: %s", self.lang_code, e) - report_error(cause="Add language") - return False - - if translation is None: - storage = get_messages(request) - message = ( - "\n".join(m.message for m in storage) - if storage - else ( - getattr(component, "new_lang_error_message", None) - or f"Could not add {self.lang_code!r}!" - ) - ) - LOGGER.warning( - "Could not add language %s to %s: %s", - self.lang_code, - component.name, - message, - ) - return False - - time.sleep(settings.BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS) - - LOGGER.info("Added language %s to %s", self.lang_code, component.name) - return True - - def _delete_component_and_commit_removal( - self, component: Component, result: dict[str, Any] - ) -> None: - """ - Delete component, remove its translation files from disk, commit and push. - - Updates result["components_deleted"] and result["errors"] as needed. - """ - name = component.name - base_path = component.full_path - repo_owner = component.linked_component if component.is_repo_link else component - if repo_owner is None: - LOGGER.warning( - "Cannot push after delete: no linked component for %s", component.slug - ) - push_branch = None - push_url = None - else: - push_branch = repo_owner.push_branch - push_url = repo_owner.push - translation_files = [ - os.path.join(base_path, t.filename) - for t in component.translation_set.exclude( - language=component.source_language - ) - ] - component.delete() - - actually_removed = [] - for file_path in translation_files: - if os.path.isfile(file_path): - try: - os.remove(file_path) - actually_removed.append(file_path) - LOGGER.info("Removed translation file: %s", file_path) - except OSError as e: - LOGGER.warning( - "Failed to remove translation file %s: %s", - file_path, - e, - ) - result["errors"].append(f"Failed to remove {file_path}: {e}") - - if actually_removed and os.path.isdir(os.path.join(base_path, ".git")): - try: - # Stage only the removed files (not all tracked changes) - rel_paths = [os.path.relpath(p, base_path) for p in actually_removed] - subprocess.run( - ["git", "-C", base_path, "add", "--", *rel_paths], - check=True, - capture_output=True, - timeout=60, - ) - status = subprocess.run( - ["git", "-C", base_path, "status", "--porcelain"], - capture_output=True, - text=True, - timeout=10, - check=False, - ) - if status.stdout.strip(): - author = ( - f"{getattr(settings, 'DEFAULT_COMMITER_NAME', 'Weblate')} " - f"<{getattr(settings, 'DEFAULT_COMMITER_EMAIL', 'noreply@weblate.org')}>" - ) - subprocess.run( - [ - "git", - "-C", - base_path, - "commit", - "-m", - f"Remove translation files for deleted component: {name}", - "--author", - author, - ], - check=True, - capture_output=True, - timeout=30, - ) - LOGGER.info("Committed deletion of translation files for: %s", name) - if push_url and push_branch: - # Push current branch to remote push_branch - subprocess.run( - [ - "git", - "-C", - base_path, - "push", - "origin", - f"HEAD:{push_branch}", - ], - check=True, - capture_output=True, - timeout=120, - ) - LOGGER.info("Pushed to origin %s", push_branch) - except subprocess.CalledProcessError as e: - LOGGER.warning("Git commit/push failed for %s: %s", name, e.stderr or e) - result["errors"].append(f"Git commit/push failed: {e.stderr or e}") - except subprocess.TimeoutExpired: - LOGGER.warning("Git commit/push timeout for %s", name) - result["errors"].append("Git commit/push timeout") - - result["components_deleted"] += 1 - LOGGER.info("Deleted component (not in configs): %s", name) - - def process_submodule( - self, submodule: str, user=None, request=None - ) -> dict[str, Any]: - """Process a single submodule: clone, scan, create/update components.""" - if self.temp_dir is None: - msg = "process_submodule requires temp_dir; call process_all() instead" - raise TypeError(msg) - result: dict[str, Any] = { - "submodule": submodule, - "success": False, - "components_created": 0, - "components_updated": 0, - "components_deleted": 0, - "errors": [], - } - - # Create temp directory for this submodule - temp_submodule_dir = os.path.join(self.temp_dir, submodule) - resolved = Path(temp_submodule_dir).resolve() - temp_dir_resolved = Path(self.temp_dir).resolve() - try: - resolved.relative_to(temp_dir_resolved) - except ValueError: - result["errors"].append(f"Invalid submodule name: {submodule}") - return result - os.makedirs(temp_submodule_dir, exist_ok=True) - - # Clone repository - if not self.clone_repository( - submodule, temp_submodule_dir, f"local-{self.lang_code}" - ): - result["errors"].append(f"Failed to clone repository for {submodule}") - return result - - # Scan for documentation files - configs = self.scan_documentation_files(temp_submodule_dir) - if not configs: - result["errors"].append( - f"No supported documentation files found in {submodule}" - ) - return result - - LOGGER.info("Found %s documentation files in %s", len(configs), submodule) - - # Check permissions before creating so no Project is committed when denied - project_slug = f"boost-{_submodule_slug(submodule)}-documentation" - existing_project = Project.objects.filter(slug=project_slug).first() - if request is not None and user is not None: - if existing_project is not None: - if not user.has_perm("project.edit", existing_project): - result["errors"].append( - "Can not create components (missing project.edit)" - ) - return result - elif not user.has_perm("project.add"): - result["errors"].append("Can not create project (missing project.add)") - return result - - # Get or create project - try: - project = self.get_or_create_project(submodule, user) - except Exception as e: - result["errors"].append(f"Failed to create project: {e}") - report_error(cause="Project creation") - return result - - # Create or update components - for config in configs: - component, was_created = self.create_or_update_component( - project, submodule, config, user=user, request=request - ) - if component is not None: - if was_created: - result["components_created"] += 1 - else: - result["components_updated"] += 1 - - # Delete components that are not in configs (no longer in repo scan). - # Never delete glossary components (is_glossary); they are managed by Weblate. - prefix = f"boost-{_submodule_slug(submodule)}-documentation-" - wanted_slugs = { - truncate_component_slug(f"{prefix}{c['component_slug']}") for c in configs - } - for component in project.component_set.all(): - if component.slug not in wanted_slugs and not component.is_glossary: - try: - self._delete_component_and_commit_removal(component, result) - except Exception as e: - LOGGER.warning( - "Failed to delete component %s: %s", component.slug, e - ) - result["errors"].append(f"Failed to delete {component.slug}: {e}") - - result["success"] = True - return result - - def process_all( - self, submodules: list[str], user=None, request=None - ) -> dict[str, Any]: - """Process all submodules.""" - # Create temp directory - self.temp_dir = tempfile.mkdtemp(prefix="boost_endpoint_") - LOGGER.info("Using temp directory: %s", self.temp_dir) - - results: dict[str, Any] = { - "total_submodules": len(submodules), - "successful": 0, - "failed": 0, - "submodule_results": [], - } - - try: - for submodule in submodules: - LOGGER.info("Processing submodule: %s", submodule) - result = self.process_submodule(submodule, user=user, request=request) - results["submodule_results"].append(result) - - if result["success"]: - results["successful"] += 1 - else: - results["failed"] += 1 - - finally: - # Cleanup temp directory - if self.temp_dir and os.path.exists(self.temp_dir): - shutil.rmtree(self.temp_dir, ignore_errors=True) - LOGGER.info("Cleaned up temp directory: %s", self.temp_dir) - - return results +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +""" +Internal Django service for Boost documentation add-or-update. + +Uses only in-memory component data: no temporary JSON files. +Builds supported formats from Weblate's FILE_FORMATS (same as list_file_format_params). +Creates/updates Project and Component via Django ORM only (no external API). + +Alignment with REST API (POST /api/projects/, POST .../components/, POST .../translations/): +- Project: same as API (get_or_create + post_create when created). API does not use Celery for create. +- Component: same create + post_create; we then call do_update/create_translations_immediate so the + component is ready before adding a language. The API relies on Component.save() which schedules + component_after_save (Celery when not eager), so the API does not wait for repo/template in the request. +- Translation: same checks and add_new_language as API; we call create_translations_immediate before + so template is on disk (API assumes component was already synced). +""" + +from __future__ import annotations + +import os +import shutil +import subprocess +import tempfile +import time +from pathlib import Path +from typing import TYPE_CHECKING, Any, cast + +from django.conf import settings +from django.contrib.messages import get_messages +from django.db import transaction + +from weblate.formats.models import FILE_FORMATS +from weblate.lang.models import Language +from weblate.logger import LOGGER +from weblate.trans.models import Component, Project +from weblate.utils.errors import report_error +from weblate.vcs.base import RepositoryError + +if TYPE_CHECKING: + from weblate.lang.models import LanguageQuerySet + +# Weblate API limit for component name and slug (Component.name / Component.slug max_length) +MAX_COMPONENT_NAME_LENGTH = 100 +MAX_COMPONENT_SLUG_LENGTH = 100 +# When over limit: first 64 + " ... " + last 25 (94 chars) to keep names unique +TRUNCATE_NAME_HEAD = 64 +TRUNCATE_NAME_TAIL = 25 +TRUNCATE_NAME_SEP = " ... " +# Slug truncation: head + "-" + tail (100 chars max) to reduce collision risk for long paths +TRUNCATE_SLUG_HEAD = 64 +TRUNCATE_SLUG_TAIL = 35 +TRUNCATE_SLUG_SEP = "-" + + +def _submodule_slug(name: str) -> str: + """Normalize submodule name to URL-safe slug: lower case, underscores to hyphens.""" + return name.lower().replace("_", "-") + + +def truncate_component_name(name: str, max_len: int = MAX_COMPONENT_NAME_LENGTH) -> str: + """Truncate component name to max_len. If over limit: first 64 + ' ... ' + last 25.""" + if len(name) <= max_len: + return name + return name[:TRUNCATE_NAME_HEAD] + TRUNCATE_NAME_SEP + name[-TRUNCATE_NAME_TAIL:] + + +def truncate_component_slug(slug: str, max_len: int = MAX_COMPONENT_SLUG_LENGTH) -> str: + """Truncate component slug to max_len. If over limit: first 64 + '-' + last 35.""" + if len(slug) <= max_len: + return slug + return slug[:TRUNCATE_SLUG_HEAD] + TRUNCATE_SLUG_SEP + slug[-TRUNCATE_SLUG_TAIL:] + + +def _build_extension_to_format() -> dict[str, str]: + """Build extension -> format_id from Weblate FILE_FORMATS (internal API).""" + result = {} + for format_cls in FILE_FORMATS.data.values(): + format_id = getattr(format_cls, "format_id", None) + if not format_id or not getattr(format_cls, "autoload", ()): + continue + for pattern in format_cls.autoload: + # e.g. "*.adoc" -> ".adoc", "*.po" -> ".po" + if pattern.startswith("*.") and len(pattern) > 2: + ext = "." + pattern[2:].lower() + result[ext] = format_id + return result + + +class BoostComponentService: + """Service for managing Boost documentation components (internal Django usage).""" + + def __init__( + self, + organization: str, + lang_code: str, + version: str, + extensions: list[str] | None = None, + ): + self.organization = organization + self.lang_code = lang_code + self.version = version + self.extensions = extensions # If None or empty, no filtering by extension list + self.temp_dir: str | None = None + self._ext_to_format: dict[str, str] | None = None + + def get_extension_to_format(self) -> dict[str, str]: + """Extension -> Weblate format_id from FILE_FORMATS.""" + if self._ext_to_format is None: + self._ext_to_format = _build_extension_to_format() + return self._ext_to_format + + def get_supported_extensions(self) -> set[str]: + """ + Set of supported file extensions (from Weblate formats). + + If self.extensions is non-empty, restrict to those that are both + Weblate-supported and in the list. + """ + supported = set(self.get_extension_to_format().keys()) + if not self.extensions: + return supported + # Normalize: ensure leading dot and lower case for comparison + allowed = set() + for e in self.extensions: + e = e.strip().lower() + if e and not e.startswith("."): + e = "." + e + if e: + allowed.add(e) + return supported & allowed + + def clone_repository(self, submodule: str, target_dir: str, branch: str) -> bool: + """Clone a git repository to target directory.""" + repo_url = f"https://github.com/{self.organization}/{submodule}.git" + + try: + LOGGER.info("Cloning %s to %s", repo_url, target_dir) + cmd = ["git", "clone", "-b", branch, "--depth", "1", repo_url, target_dir] + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=300, + check=False, + ) + + if result.returncode != 0: + LOGGER.error("Failed to clone: %s", result.stderr) + return False + + LOGGER.info("Cloned %s", submodule) + return True + + except subprocess.TimeoutExpired: + LOGGER.error("Clone timeout for %s", submodule) + return False + except Exception as e: + LOGGER.error("Clone exception: %s", e) + report_error(cause="Boost component clone") + return False + + def scan_documentation_files(self, repo_dir: str) -> list[dict[str, Any]]: + """ + Scan repo for doc files; return list of in-memory component configs. + + Only files in subfolders are included; files in repo root are skipped. + Uses get_supported_extensions() which respects self.extensions when set. + """ + supported_exts = self.get_supported_extensions() + configs = [] + + for root, dirs, files in os.walk(repo_dir): + # Skip hidden directories and common non-doc directories + dirs[:] = [ + d + for d in dirs + if not d.startswith(".") and d not in {"__pycache__", "node_modules"} + ] + + for file in files: + file_path = Path(root) / file + ext = file_path.suffix.lower() + + if ext not in supported_exts: + continue + + # Exclude translation files: filename like *_{lang_code} (e.g. intro_zh_Hans.adoc) + if file_path.stem.endswith("_" + self.lang_code): + continue + + relative_path = file_path.relative_to(repo_dir) + # Skip files in repo root (only include files in subfolders) + if len(relative_path.parts) <= 1: + continue + + config = self.generate_component_config(str(relative_path), ext) + if config: + configs.append(config) + + return configs + + def generate_component_config( + self, file_path: str, extension: str + ) -> dict[str, Any] | None: + """Build in-memory component config for a doc file (no JSON file written).""" + ext_to_fmt = self.get_extension_to_format() + file_format = ext_to_fmt.get(extension) + if not file_format: + return None + + # Extract file name without extension + path_obj = Path(file_path) + filename_base = path_obj.stem + dir_path = path_obj.parent + + # Generate component name from path (include extension so doc/intro.adoc vs doc/intro.md differ) + component_name_parts: list[str] = [] + if str(dir_path) != ".": + component_name_parts.extend(dir_path.parts) + component_name_parts.append(filename_base) + ext_display = extension.lstrip(".").lower() + component_name = " / ".join( + part.replace("_", " ").replace("-", " ").title() + for part in component_name_parts + ) + component_name = f"{component_name} ({ext_display})" + + # Generate slug (include extension so doc/intro.adoc vs doc/intro.md differ) + slug_parts = [part.lower().replace("_", "-") for part in component_name_parts] + slug_parts.append(extension.lstrip(".").lower()) + component_slug = "-".join(slug_parts) + + # File mask for translations (e.g., "doc/intro_*.adoc" for "doc/intro.adoc") + filemask = str(dir_path / f"{filename_base}_*{extension}") + template = file_path + new_base = file_path + + return { + "component_name": component_name, + "component_slug": component_slug, + "filemask": filemask, + "template": template, + "new_base": new_base, + "file_format": file_format, + "file_path": file_path, + } + + def get_or_create_project(self, submodule: str, user=None) -> Project: + """Get or create a Weblate project for the submodule.""" + slug = _submodule_slug(submodule) + submodule_title = submodule.replace("_", " ").title() + project_name = f"Boost {submodule_title} Translation ({self.lang_code})" + project_slug = f"boost-{slug}-documentation-{self.lang_code}" + project_web = ( + f"https://www.boost.org/doc/libs/master/libs/{submodule}/doc/html/" + ) + + with transaction.atomic(): + project, created = Project.objects.get_or_create( + slug=project_slug, + defaults={ + "name": project_name, + "web": project_web, + "instructions": ( + f"Please translate the Boost.{submodule.replace('_', ' ').title()} " + "documentation. Maintain technical accuracy and follow exact " + "formatting conventions." + ), + "access_control": Project.ACCESS_PUBLIC, + "commit_policy": 0, + }, + ) + + if created: + LOGGER.info("Created project: %s", project_name) + # Match API: ProjectViewSet.create uses perform_create -> post_create(user, billing). + if user: + project.post_create(user, billing=None) + else: + LOGGER.info("Project exists: %s", project_name) + + if user: + project.acting_user = user + + return project + + def create_or_update_component( + self, + project: Project, + submodule: str, + config: dict[str, Any], + user=None, + request=None, + ) -> tuple[Component | None, bool]: + """ + Create or update a component. Returns (component, was_created). + + Settings and logic aligned with scripts/auto/create_component.py and + scripts/auto/boost-submodule-component-configs/setup_boost-*-.json + (same as API POST projects/{project_slug}/components/). + """ + required_config_keys = { + "component_slug", + "component_name", + "filemask", + "template", + "new_base", + "file_format", + } + missing = required_config_keys - set(config.keys()) + if missing: + LOGGER.error("Invalid component config: missing keys %s", missing) + return None, False + + slug = _submodule_slug(submodule) + component_slug = truncate_component_slug( + f"boost-{slug}-documentation-{config['component_slug']}" + ) + # Push branch name: translation-{self.lang_code}-{self.version} + push_branch = f"translation-{self.lang_code}-{self.version}" + + # Component name: "Boost {Submodule} Documentation / Doc / Library Detail" + submodule_title = submodule.replace("_", " ").title() + component_name = truncate_component_name( + f"Boost {submodule_title} Documentation / {config['component_name']}" + ) + + # Source language: "en" (hardcoded) + try: + source_language = Language.objects.get(code="en") + except Language.DoesNotExist: + LOGGER.error("Source language 'en' not found; cannot create component") + report_error(cause="Component creation/update") + return None, False + + # Single clone per repo: first component gets real repo, others use weblate:// + real_repo = f"git@github.com:{self.organization}/{submodule}.git" + repo_owner = ( + Component.objects.filter(project=project, repo=real_repo) + .order_by("slug") + .first() + ) + if repo_owner is not None: + # Another component already has the clone; link to it + repo_url = f"weblate://{project.slug}/{repo_owner.slug}" + push_url = "" + else: + repo_url = real_repo + push_url = real_repo + + # Component defaults aligned with create_component.py / reference JSON + component_defaults = { + "name": component_name, + "vcs": "github", + "repo": repo_url, + "push": push_url, + "branch": f"local-{self.lang_code}", + "push_branch": push_branch, + "filemask": config["filemask"], + "template": config["template"], + "new_base": config["new_base"], + "file_format": config["file_format"], + "edit_template": False, + "source_language": source_language, + "license": "", + "allow_translation_propagation": False, + "enable_suggestions": True, + "suggestion_voting": False, + "suggestion_autoaccept": 0, + "check_flags": "", + "language_regex": f"^{self.lang_code}$", + "manage_units": False, + } + + try: + # Ensure project still exists (e.g. not deleted by another process) + if not Project.objects.filter(pk=project.pk).exists(): + project = self.get_or_create_project(submodule, user=user) + with transaction.atomic(): + component, created = Component.objects.get_or_create( + project=project, + slug=component_slug, + defaults=component_defaults, + ) + + if user: + component.acting_user = user + + if created: + LOGGER.info("Created component: %s", component.name) + # Match API: ProjectViewSet.components (POST) calls instance.post_create(user, origin="api") + if user: + component.post_create(user, origin="boost_endpoint") + # Synchronization: ensure repo/translations exist before add_language_to_component. + self._sync_component_for_translation( + component, request, created=True + ) + else: + LOGGER.info("Component exists: %s", component.name) + # Ensure branch is "local-{lang_code}" (avoid "fatal: no such branch: 'master'" + # when remote has no master/main) + update_fields = [] + if component.push_branch != push_branch: + component.push_branch = push_branch + update_fields.append("push_branch") + if update_fields: + component.save(update_fields=update_fields) + + # Trigger git pull only for repo owner; linked components share the same lock. + self._sync_component_for_translation( + component, request, created=False + ) + self.add_language_to_component(component, request) + + return component, created + + except Exception as e: + LOGGER.error( + "Failed to create/update component (%s): %s", + type(e).__name__, + e, + ) + report_error(cause="Component creation/update") + return None, False + + def _do_update_git_only(self, component: Component, request) -> bool: + """ + Perform only the git update (fetch, merge/rebase). Does not call create_translations. + + Mirrors Component.do_update lock block + push_if_needed; caller must call + create_translations_immediate after. + """ + component.translations_progress = 0 + component.translations_count = 0 + # Hold lock all time here to avoid somebody writing between commit + # and merge/rebase. + with component.repository.lock: + component.store_background_task() + component.progress_step(0) + component.configure_repo(pull=False) + + # pull remote + if not component.update_remote_branch(): + return False + + component.configure_branch() + + # do we have something to merge? + try: + needs_merge = component.repo_needs_merge() + except RepositoryError: + # Not yet configured repository + needs_merge = True + + if not needs_merge: + component.delete_alert("MergeFailure") + component.delete_alert("RepositoryOutdated") + return True + + # commit possible pending changes if needed + if component.needs_commit_upstream(): + component.commit_pending( + "update", request.user if request else None, skip_push=True + ) + + # update local branch + try: + result = component.update_branch(request, method=None, skip_push=True) + except RepositoryError: + result = False + + if result: + # Push after possible merge (create_translations is called by caller) + component.push_if_needed(do_update=False) + + if not component.repo_needs_push(): + component.delete_alert("RepositoryChanges") + + component.progress_step(100) + component.translations_count = None + + return result + + def _sync_component_for_translation( + self, component: Component, request, *, created: bool + ) -> None: + """Ensure repo/translations are ready before add_language_to_component. Idempotent.""" + if not component.is_repo_link: + try: + # For a newly created repo-owner component the VCS directory does not + # exist yet. sync_git_repo(validate=False) clones when is_valid() is + # False, then configures the repo and branch — exactly what the ORM- + # save path would do. For existing components we skip straight to the + # lighter _do_update_git_only (fetch + merge only). + if created and not component.repository.is_valid(): + component.sync_git_repo(skip_push=True) + LOGGER.info( + "Initial clone completed for new component: %s", component.name + ) + else: + result = self._do_update_git_only(component, request) + if result: + LOGGER.info("Updated component repository: %s", component.name) + else: + LOGGER.warning( + "Git update did not succeed for %s", component.name + ) + except Exception as e: + LOGGER.warning( + "Failed to %s %s: %s", + "clone/update new component" if created else "update component", + component.name, + e, + ) + report_error( + cause="Component creation" if created else "Component update" + ) + try: + component.create_translations_immediate(request=request, force=True) + LOGGER.info( + "%s: %s", + "Loaded translations for new repo link" + if created + else "Refreshed translations for repo link", + component.name, + ) + except Exception as e: + LOGGER.warning( + "Failed to %s %s: %s", + "load translations for new link" + if created + else "refresh translations for", + component.name, + e, + ) + + def add_language_to_component(self, component: Component, request=None) -> bool: + """ + Add language to component if not already added. + + Logic matches API view ComponentViewSet.translations (POST). + """ + if request is None: + LOGGER.error("add_language_to_component requires request for permissions") + return False + + try: + language = Language.objects.get(code=self.lang_code) + except Language.DoesNotExist: + LOGGER.error("Language %s not found", self.lang_code) + return False + + if component.translation_set.filter(language=language).exists(): + LOGGER.info( + "Language %s already exists in %s", self.lang_code, component.name + ) + return True + + # Check order: (1) permission, (2) language in allowed set, (3) sync, (4) policy/validity, (5) add. + # (1) has_perm("translation.add"): permission only, no I/O; fail fast. + if not request.user.has_perm("translation.add", component): + LOGGER.warning( + "Can not create translation: no translation.add on %s", component.name + ) + return False + + # (2) get_all_available_languages() + add_more filter: DB only. Ensure lang_code is in the + # allowed set (not already in component; if user lacks add_more, restrict to basic/project + # languages). Fail fast before any I/O so we do not sync when language is not addable. + base_languages = cast( + "LanguageQuerySet", component.get_all_available_languages() + ) + if not request.user.has_perm("translation.add_more", component): + base_languages = base_languages.filter_for_add(component.project) + if not base_languages.filter(pk=language.pk).exists(): + LOGGER.error( + "Could not add %r to %s (language not available)", + self.lang_code, + component.name, + ) + return False + + # (3) create_translations_immediate: loads translations and ensures template/new_base + # are on disk. Required before (4) because can_add_new_language checks file existence + # and template validity. + try: + component.create_translations_immediate(request=request, force=True) + except Exception as e: + LOGGER.warning("create_translations_immediate before add language: %s", e) + return False + + # (4) can_add_new_language: checks new_lang config, template/new_base existence and + # validity, is_valid_base_for_new. Depends on (3) so files exist. + if not component.can_add_new_language(request.user): + reason = ( + getattr(component, "new_lang_error_message", None) + or "Can not add new language" + ) + LOGGER.warning( + "Could not add language %s to %s: %s", + self.lang_code, + component.name, + reason, + ) + return False + + # (5) add_new_language: creates translation file and DB record. Depends on (3) and (4). + try: + translation = component.add_new_language(language, request) + except Exception as e: + LOGGER.error("Failed to add language %s: %s", self.lang_code, e) + report_error(cause="Add language") + return False + + if translation is None: + storage = get_messages(request) + message = ( + "\n".join(m.message for m in storage) + if storage + else ( + getattr(component, "new_lang_error_message", None) + or f"Could not add {self.lang_code!r}!" + ) + ) + LOGGER.warning( + "Could not add language %s to %s: %s", + self.lang_code, + component.name, + message, + ) + return False + + time.sleep(settings.BOOST_ENDPOINT_ADD_TRANSLATION_SECONDS) + + LOGGER.info("Added language %s to %s", self.lang_code, component.name) + return True + + def _delete_component_and_commit_removal( + self, component: Component, result: dict[str, Any] + ) -> None: + """ + Delete component, remove its translation files from disk, commit and push. + + Updates result["components_deleted"] and result["errors"] as needed. + """ + name = component.name + base_path = component.full_path + repo_owner = component.linked_component if component.is_repo_link else component + if repo_owner is None: + LOGGER.warning( + "Cannot push after delete: no linked component for %s", component.slug + ) + push_branch = None + push_url = None + else: + push_branch = repo_owner.push_branch + push_url = repo_owner.push + translation_files = [ + os.path.join(base_path, t.filename) + for t in component.translation_set.exclude( + language=component.source_language + ) + ] + component.delete() + + actually_removed = [] + for file_path in translation_files: + if os.path.isfile(file_path): + try: + os.remove(file_path) + actually_removed.append(file_path) + LOGGER.info("Removed translation file: %s", file_path) + except OSError as e: + LOGGER.warning( + "Failed to remove translation file %s: %s", + file_path, + e, + ) + result["errors"].append(f"Failed to remove {file_path}: {e}") + + if actually_removed and os.path.isdir(os.path.join(base_path, ".git")): + try: + # Stage only the removed files (not all tracked changes) + rel_paths = [os.path.relpath(p, base_path) for p in actually_removed] + subprocess.run( + ["git", "-C", base_path, "add", "--", *rel_paths], + check=True, + capture_output=True, + timeout=60, + ) + status = subprocess.run( + ["git", "-C", base_path, "status", "--porcelain"], + capture_output=True, + text=True, + timeout=10, + check=False, + ) + if status.stdout.strip(): + author = ( + f"{getattr(settings, 'DEFAULT_COMMITER_NAME', 'Weblate')} " + f"<{getattr(settings, 'DEFAULT_COMMITER_EMAIL', 'noreply@weblate.org')}>" + ) + subprocess.run( + [ + "git", + "-C", + base_path, + "commit", + "-m", + f"Remove translation files for deleted component: {name}", + "--author", + author, + ], + check=True, + capture_output=True, + timeout=30, + ) + LOGGER.info("Committed deletion of translation files for: %s", name) + if push_url and push_branch: + # Push current branch to remote push_branch + subprocess.run( + [ + "git", + "-C", + base_path, + "push", + "origin", + f"HEAD:{push_branch}", + ], + check=True, + capture_output=True, + timeout=120, + ) + LOGGER.info("Pushed to origin %s", push_branch) + except subprocess.CalledProcessError as e: + LOGGER.warning("Git commit/push failed for %s: %s", name, e.stderr or e) + result["errors"].append(f"Git commit/push failed: {e.stderr or e}") + except subprocess.TimeoutExpired: + LOGGER.warning("Git commit/push timeout for %s", name) + result["errors"].append("Git commit/push timeout") + + result["components_deleted"] += 1 + LOGGER.info("Deleted component (not in configs): %s", name) + + def process_submodule( + self, submodule: str, user=None, request=None + ) -> dict[str, Any]: + """Process a single submodule: clone, scan, create/update components.""" + if self.temp_dir is None: + msg = "process_submodule requires temp_dir; call process_all() instead" + raise TypeError(msg) + result: dict[str, Any] = { + "submodule": submodule, + "success": False, + "components_created": 0, + "components_updated": 0, + "components_deleted": 0, + "errors": [], + } + + # Create temp directory for this submodule + temp_submodule_dir = os.path.join(self.temp_dir, submodule) + resolved = Path(temp_submodule_dir).resolve() + temp_dir_resolved = Path(self.temp_dir).resolve() + try: + resolved.relative_to(temp_dir_resolved) + except ValueError: + result["errors"].append(f"Invalid submodule name: {submodule}") + return result + os.makedirs(temp_submodule_dir, exist_ok=True) + + # Clone repository + if not self.clone_repository( + submodule, temp_submodule_dir, f"local-{self.lang_code}" + ): + result["errors"].append(f"Failed to clone repository for {submodule}") + return result + + # Scan for documentation files + configs = self.scan_documentation_files(temp_submodule_dir) + if not configs: + result["errors"].append( + f"No supported documentation files found in {submodule}" + ) + return result + + LOGGER.info("Found %s documentation files in %s", len(configs), submodule) + + # Check permissions before creating so no Project is committed when denied + project_slug = f"boost-{_submodule_slug(submodule)}-documentation" + existing_project = Project.objects.filter(slug=project_slug).first() + if request is not None and user is not None: + if existing_project is not None: + if not user.has_perm("project.edit", existing_project): + result["errors"].append( + "Can not create components (missing project.edit)" + ) + return result + elif not user.has_perm("project.add"): + result["errors"].append("Can not create project (missing project.add)") + return result + + # Get or create project + try: + project = self.get_or_create_project(submodule, user) + except Exception as e: + result["errors"].append(f"Failed to create project: {e}") + report_error(cause="Project creation") + return result + + # Create or update components + for config in configs: + component, was_created = self.create_or_update_component( + project, submodule, config, user=user, request=request + ) + if component is not None: + if was_created: + result["components_created"] += 1 + else: + result["components_updated"] += 1 + + # Delete components that are not in configs (no longer in repo scan). + # Never delete glossary components (is_glossary); they are managed by Weblate. + prefix = f"boost-{_submodule_slug(submodule)}-documentation-" + wanted_slugs = { + truncate_component_slug(f"{prefix}{c['component_slug']}") for c in configs + } + for component in project.component_set.all(): + if component.slug not in wanted_slugs and not component.is_glossary: + try: + self._delete_component_and_commit_removal(component, result) + except Exception as e: + LOGGER.warning( + "Failed to delete component %s: %s", component.slug, e + ) + result["errors"].append(f"Failed to delete {component.slug}: {e}") + + result["success"] = True + return result + + def process_all( + self, submodules: list[str], user=None, request=None + ) -> dict[str, Any]: + """Process all submodules.""" + # Create temp directory + self.temp_dir = tempfile.mkdtemp(prefix="boost_endpoint_") + LOGGER.info("Using temp directory: %s", self.temp_dir) + + results: dict[str, Any] = { + "total_submodules": len(submodules), + "successful": 0, + "failed": 0, + "submodule_results": [], + } + + try: + for submodule in submodules: + LOGGER.info("Processing submodule: %s", submodule) + result = self.process_submodule(submodule, user=user, request=request) + results["submodule_results"].append(result) + + if result["success"]: + results["successful"] += 1 + else: + results["failed"] += 1 + + finally: + # Cleanup temp directory + if self.temp_dir and os.path.exists(self.temp_dir): + shutil.rmtree(self.temp_dir, ignore_errors=True) + LOGGER.info("Cleaned up temp directory: %s", self.temp_dir) + + return results diff --git a/weblate/boost_endpoint/tasks.py b/weblate/boost_endpoint/tasks.py index 6bf016ee6757..4a149f77b41c 100755 --- a/weblate/boost_endpoint/tasks.py +++ b/weblate/boost_endpoint/tasks.py @@ -1,45 +1,43 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" - -from __future__ import annotations - -from typing import Any - -from weblate.auth.models import AuthenticatedHttpRequest, User -from weblate.boost_endpoint.services import BoostComponentService -from weblate.utils.celery import app - - -@app.task(trail=False) -def boost_add_or_update_task( - *, - organization: str, - add_or_update: dict[str, list[str]], - version: str, - extensions: list[str] | None, - user_id: int, -) -> dict[str, Any]: - """ - Run BoostComponentService for each language (same logic as synchronous POST). - - Exceptions propagate so Celery marks the task failed and monitoring can alert. - """ - user = User.objects.get(pk=user_id) - request = AuthenticatedHttpRequest() - request.user = user - - results: dict[str, Any] = {} - for lang_code, submodules in add_or_update.items(): - service = BoostComponentService( - organization=organization, - lang_code=lang_code, - version=version, - extensions=extensions, - ) - results[lang_code] = service.process_all( - submodules, user=user, request=request - ) - return results +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" + +from __future__ import annotations + +from typing import Any + +from weblate.auth.models import AuthenticatedHttpRequest, User +from weblate.boost_endpoint.services import BoostComponentService +from weblate.utils.celery import app + + +@app.task(trail=False) +def boost_add_or_update_task( + *, + organization: str, + add_or_update: dict[str, list[str]], + version: str, + extensions: list[str] | None, + user_id: int, +) -> dict[str, Any]: + """ + Run BoostComponentService for each language (same logic as synchronous POST). + + Exceptions propagate so Celery marks the task failed and monitoring can alert. + """ + user = User.objects.get(pk=user_id) + request = AuthenticatedHttpRequest() + request.user = user + + results: dict[str, Any] = {} + for lang_code, submodules in add_or_update.items(): + service = BoostComponentService( + organization=organization, + lang_code=lang_code, + version=version, + extensions=extensions, + ) + results[lang_code] = service.process_all(submodules, user=user, request=request) + return results diff --git a/weblate/boost_endpoint/urls.py b/weblate/boost_endpoint/urls.py index 8ad08d05bc0b..7d90ea7946d6 100755 --- a/weblate/boost_endpoint/urls.py +++ b/weblate/boost_endpoint/urls.py @@ -1,12 +1,12 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from django.urls import path - -from weblate.boost_endpoint.views import AddOrUpdateView, BoostEndpointInfo - -urlpatterns = [ - path("", BoostEndpointInfo.as_view(), name="info"), - path("add-or-update/", AddOrUpdateView.as_view(), name="add-or-update"), -] +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from django.urls import path + +from weblate.boost_endpoint.views import AddOrUpdateView, BoostEndpointInfo + +urlpatterns = [ + path("", BoostEndpointInfo.as_view(), name="info"), + path("add-or-update/", AddOrUpdateView.as_view(), name="add-or-update"), +] diff --git a/weblate/boost_endpoint/views.py b/weblate/boost_endpoint/views.py index 3f2d9f45cbf2..b012d2bbebc9 100755 --- a/weblate/boost_endpoint/views.py +++ b/weblate/boost_endpoint/views.py @@ -1,73 +1,73 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -from __future__ import annotations - -from rest_framework import status -from rest_framework.permissions import IsAuthenticated -from rest_framework.response import Response -from rest_framework.views import APIView - -from weblate.boost_endpoint.serializers import AddOrUpdateRequestSerializer -from weblate.boost_endpoint.tasks import boost_add_or_update_task - - -class BoostEndpointInfo(APIView): - """Boost documentation translation API info.""" - - permission_classes = (IsAuthenticated,) - - def get(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 - """Return Boost endpoint module info.""" - return Response( - { - "module": "boost-endpoint", - "description": "Boost documentation translation API", - } - ) - - -class AddOrUpdateView(APIView): - """Add or update Boost documentation components.""" - - permission_classes = (IsAuthenticated,) - - def post(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 - """ - Create or update Boost documentation components. - - add_or_update is a map: lang_code -> [submodule names]. For each lang_code - the service runs with that language and its submodule list (clone, scan, - create/update project and components, add language). - - Heavy work runs in a Celery worker and returns immediately with HTTP 202 and - task_id so clients can validate the request without waiting for completion. - """ - serializer = AddOrUpdateRequestSerializer(data=request.data) - if not serializer.is_valid(): - return Response( - {"errors": serializer.errors}, - status=status.HTTP_400_BAD_REQUEST, - ) - - data = serializer.validated_data - async_result = boost_add_or_update_task.delay( - organization=data["organization"], - add_or_update=data["add_or_update"], - version=data["version"], - extensions=data.get("extensions"), - user_id=request.user.pk, - ) - - return Response( - { - "status": "accepted", - "task_id": str(async_result.id), - "detail": ( - "Boost add-or-update is running in the background; " - "check Celery logs or task result for completion." - ), - }, - status=status.HTTP_202_ACCEPTED, - ) +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +from __future__ import annotations + +from rest_framework import status +from rest_framework.permissions import IsAuthenticated +from rest_framework.response import Response +from rest_framework.views import APIView + +from weblate.boost_endpoint.serializers import AddOrUpdateRequestSerializer +from weblate.boost_endpoint.tasks import boost_add_or_update_task + + +class BoostEndpointInfo(APIView): + """Boost documentation translation API info.""" + + permission_classes = (IsAuthenticated,) + + def get(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 + """Return Boost endpoint module info.""" + return Response( + { + "module": "boost-endpoint", + "description": "Boost documentation translation API", + } + ) + + +class AddOrUpdateView(APIView): + """Add or update Boost documentation components.""" + + permission_classes = (IsAuthenticated,) + + def post(self, request, format=None): # pylint: disable=redefined-builtin # noqa: A002 + """ + Create or update Boost documentation components. + + add_or_update is a map: lang_code -> [submodule names]. For each lang_code + the service runs with that language and its submodule list (clone, scan, + create/update project and components, add language). + + Heavy work runs in a Celery worker and returns immediately with HTTP 202 and + task_id so clients can validate the request without waiting for completion. + """ + serializer = AddOrUpdateRequestSerializer(data=request.data) + if not serializer.is_valid(): + return Response( + {"errors": serializer.errors}, + status=status.HTTP_400_BAD_REQUEST, + ) + + data = serializer.validated_data + async_result = boost_add_or_update_task.delay( + organization=data["organization"], + add_or_update=data["add_or_update"], + version=data["version"], + extensions=data.get("extensions"), + user_id=request.user.pk, + ) + + return Response( + { + "status": "accepted", + "task_id": str(async_result.id), + "detail": ( + "Boost add-or-update is running in the background; " + "check Celery logs or task result for completion." + ), + }, + status=status.HTTP_202_ACCEPTED, + ) From 5da2b1875f535eedc6979b598eeecbf86211f7dc Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 11:33:51 -0600 Subject: [PATCH 09/15] Restore some parts of boost_endpoint. --- weblate/boost_endpoint/__init__.py | 0 weblate/boost_endpoint/apps.py | 0 weblate/boost_endpoint/serializers.py | 0 weblate/boost_endpoint/services.py | 68 ++++++++++++--------- weblate/boost_endpoint/tasks.py | 88 ++++++++++++++------------- weblate/boost_endpoint/urls.py | 0 6 files changed, 85 insertions(+), 71 deletions(-) mode change 100755 => 100644 weblate/boost_endpoint/__init__.py mode change 100755 => 100644 weblate/boost_endpoint/apps.py mode change 100755 => 100644 weblate/boost_endpoint/serializers.py mode change 100755 => 100644 weblate/boost_endpoint/urls.py diff --git a/weblate/boost_endpoint/__init__.py b/weblate/boost_endpoint/__init__.py old mode 100755 new mode 100644 diff --git a/weblate/boost_endpoint/apps.py b/weblate/boost_endpoint/apps.py old mode 100755 new mode 100644 diff --git a/weblate/boost_endpoint/serializers.py b/weblate/boost_endpoint/serializers.py old mode 100755 new mode 100644 diff --git a/weblate/boost_endpoint/services.py b/weblate/boost_endpoint/services.py index fcdef71e9759..6474453167f6 100755 --- a/weblate/boost_endpoint/services.py +++ b/weblate/boost_endpoint/services.py @@ -20,6 +20,7 @@ from __future__ import annotations +import hashlib import os import shutil import subprocess @@ -35,6 +36,7 @@ from weblate.formats.models import FILE_FORMATS from weblate.lang.models import Language from weblate.logger import LOGGER +from weblate.trans.defines import COMPONENT_NAME_LENGTH from weblate.trans.models import Component, Project from weblate.utils.errors import report_error from weblate.vcs.base import RepositoryError @@ -42,17 +44,16 @@ if TYPE_CHECKING: from weblate.lang.models import LanguageQuerySet -# Weblate API limit for component name and slug (Component.name / Component.slug max_length) -MAX_COMPONENT_NAME_LENGTH = 100 -MAX_COMPONENT_SLUG_LENGTH = 100 -# When over limit: first 64 + " ... " + last 25 (94 chars) to keep names unique -TRUNCATE_NAME_HEAD = 64 -TRUNCATE_NAME_TAIL = 25 -TRUNCATE_NAME_SEP = " ... " -# Slug truncation: head + "-" + tail (100 chars max) to reduce collision risk for long paths -TRUNCATE_SLUG_HEAD = 64 -TRUNCATE_SLUG_TAIL = 35 -TRUNCATE_SLUG_SEP = "-" +# Component.name / Component.slug max_length — imported from weblate.trans.defines so this +# always matches the actual database column constraint (100 as of this writing). +MAX_COMPONENT_NAME_LENGTH = COMPONENT_NAME_LENGTH +MAX_COMPONENT_SLUG_LENGTH = COMPONENT_NAME_LENGTH +# When over limit: keep first (max_len - 10) chars and append "[<8-hex-hash>]" (10 chars) so the +# result is always <= max_len and is unique for any two distinct full names. +TRUNCATE_NAME_HASH_LEN = 8 # 1 "[" + 8 hex + 1 "]" = 10 chars suffix +# Slug truncation: keep first (max_len - 9) chars and append "-<8-hex>" (9 chars). +# Uses URL-safe hex only (no brackets) and guarantees uniqueness the same way as name truncation. +TRUNCATE_SLUG_HASH_LEN = 8 # 1 "-" + 8 hex = 9 chars suffix def _submodule_slug(name: str) -> str: @@ -61,17 +62,37 @@ def _submodule_slug(name: str) -> str: def truncate_component_name(name: str, max_len: int = MAX_COMPONENT_NAME_LENGTH) -> str: - """Truncate component name to max_len. If over limit: first 64 + ' ... ' + last 25.""" + """ + Truncate component name to max_len. + + If over limit: keep first (max_len - 10) chars and append "[<8-hex>]" (10 chars) derived + from the full name's SHA-256. This guarantees uniqueness: two distinct full names always + produce distinct truncated names (collision probability ≈ 1/16^8, negligible). + """ if len(name) <= max_len: return name - return name[:TRUNCATE_NAME_HEAD] + TRUNCATE_NAME_SEP + name[-TRUNCATE_NAME_TAIL:] + hash_suffix = ( + "[" + hashlib.sha256(name.encode()).hexdigest()[:TRUNCATE_NAME_HASH_LEN] + "]" + ) + head_len = max_len - len(hash_suffix) + return name[:head_len] + hash_suffix def truncate_component_slug(slug: str, max_len: int = MAX_COMPONENT_SLUG_LENGTH) -> str: - """Truncate component slug to max_len. If over limit: first 64 + '-' + last 35.""" + """ + Truncate component slug to max_len. + + If over limit: keep first (max_len - 9) chars and append "-<8-hex>" derived from the + slug's SHA-256. Uses only URL-safe characters (lowercase hex + hyphen) and guarantees + uniqueness for any two distinct full slugs. + """ if len(slug) <= max_len: return slug - return slug[:TRUNCATE_SLUG_HEAD] + TRUNCATE_SLUG_SEP + slug[-TRUNCATE_SLUG_TAIL:] + hash_suffix = ( + "-" + hashlib.sha256(slug.encode()).hexdigest()[:TRUNCATE_SLUG_HASH_LEN] + ) + head_len = max_len - len(hash_suffix) + return slug[:head_len] + hash_suffix def _build_extension_to_format() -> dict[str, str]: @@ -315,18 +336,12 @@ def create_or_update_component( LOGGER.error("Invalid component config: missing keys %s", missing) return None, False - slug = _submodule_slug(submodule) - component_slug = truncate_component_slug( - f"boost-{slug}-documentation-{config['component_slug']}" - ) + component_slug = truncate_component_slug(config["component_slug"]) # Push branch name: translation-{self.lang_code}-{self.version} push_branch = f"translation-{self.lang_code}-{self.version}" - # Component name: "Boost {Submodule} Documentation / Doc / Library Detail" - submodule_title = submodule.replace("_", " ").title() - component_name = truncate_component_name( - f"Boost {submodule_title} Documentation / {config['component_name']}" - ) + # Component name: path-based, e.g. "Doc / Modules / Root / Pages / Intro (adoc)" + component_name = truncate_component_name(config["component_name"]) # Source language: "en" (hardcoded) try: @@ -824,10 +839,7 @@ def process_submodule( # Delete components that are not in configs (no longer in repo scan). # Never delete glossary components (is_glossary); they are managed by Weblate. - prefix = f"boost-{_submodule_slug(submodule)}-documentation-" - wanted_slugs = { - truncate_component_slug(f"{prefix}{c['component_slug']}") for c in configs - } + wanted_slugs = {truncate_component_slug(c["component_slug"]) for c in configs} for component in project.component_set.all(): if component.slug not in wanted_slugs and not component.is_glossary: try: diff --git a/weblate/boost_endpoint/tasks.py b/weblate/boost_endpoint/tasks.py index 4a149f77b41c..6bf016ee6757 100755 --- a/weblate/boost_endpoint/tasks.py +++ b/weblate/boost_endpoint/tasks.py @@ -1,43 +1,45 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" - -from __future__ import annotations - -from typing import Any - -from weblate.auth.models import AuthenticatedHttpRequest, User -from weblate.boost_endpoint.services import BoostComponentService -from weblate.utils.celery import app - - -@app.task(trail=False) -def boost_add_or_update_task( - *, - organization: str, - add_or_update: dict[str, list[str]], - version: str, - extensions: list[str] | None, - user_id: int, -) -> dict[str, Any]: - """ - Run BoostComponentService for each language (same logic as synchronous POST). - - Exceptions propagate so Celery marks the task failed and monitoring can alert. - """ - user = User.objects.get(pk=user_id) - request = AuthenticatedHttpRequest() - request.user = user - - results: dict[str, Any] = {} - for lang_code, submodules in add_or_update.items(): - service = BoostComponentService( - organization=organization, - lang_code=lang_code, - version=version, - extensions=extensions, - ) - results[lang_code] = service.process_all(submodules, user=user, request=request) - return results +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" + +from __future__ import annotations + +from typing import Any + +from weblate.auth.models import AuthenticatedHttpRequest, User +from weblate.boost_endpoint.services import BoostComponentService +from weblate.utils.celery import app + + +@app.task(trail=False) +def boost_add_or_update_task( + *, + organization: str, + add_or_update: dict[str, list[str]], + version: str, + extensions: list[str] | None, + user_id: int, +) -> dict[str, Any]: + """ + Run BoostComponentService for each language (same logic as synchronous POST). + + Exceptions propagate so Celery marks the task failed and monitoring can alert. + """ + user = User.objects.get(pk=user_id) + request = AuthenticatedHttpRequest() + request.user = user + + results: dict[str, Any] = {} + for lang_code, submodules in add_or_update.items(): + service = BoostComponentService( + organization=organization, + lang_code=lang_code, + version=version, + extensions=extensions, + ) + results[lang_code] = service.process_all( + submodules, user=user, request=request + ) + return results diff --git a/weblate/boost_endpoint/urls.py b/weblate/boost_endpoint/urls.py old mode 100755 new mode 100644 From a8a9549fb54c991f1231e935e7492e1878d40680 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 11:36:32 -0600 Subject: [PATCH 10/15] Update service file of boost_endpoint. --- weblate/boost_endpoint/services.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100755 => 100644 weblate/boost_endpoint/services.py diff --git a/weblate/boost_endpoint/services.py b/weblate/boost_endpoint/services.py old mode 100755 new mode 100644 From bc924d1bbabad0ad593e40f423d9700775f4bd28 Mon Sep 17 00:00:00 2001 From: AuraMindNest <242653549+AuraMindNest@users.noreply.github.com> Date: Thu, 7 May 2026 17:42:35 +0000 Subject: [PATCH 11/15] docs: Documentation snippets update --- weblate/boost_endpoint/tasks.py | 88 ++++++++++++++++----------------- 1 file changed, 43 insertions(+), 45 deletions(-) diff --git a/weblate/boost_endpoint/tasks.py b/weblate/boost_endpoint/tasks.py index 6bf016ee6757..4a149f77b41c 100755 --- a/weblate/boost_endpoint/tasks.py +++ b/weblate/boost_endpoint/tasks.py @@ -1,45 +1,43 @@ -# Copyright © Boost Organization -# -# SPDX-License-Identifier: GPL-3.0-or-later - -"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" - -from __future__ import annotations - -from typing import Any - -from weblate.auth.models import AuthenticatedHttpRequest, User -from weblate.boost_endpoint.services import BoostComponentService -from weblate.utils.celery import app - - -@app.task(trail=False) -def boost_add_or_update_task( - *, - organization: str, - add_or_update: dict[str, list[str]], - version: str, - extensions: list[str] | None, - user_id: int, -) -> dict[str, Any]: - """ - Run BoostComponentService for each language (same logic as synchronous POST). - - Exceptions propagate so Celery marks the task failed and monitoring can alert. - """ - user = User.objects.get(pk=user_id) - request = AuthenticatedHttpRequest() - request.user = user - - results: dict[str, Any] = {} - for lang_code, submodules in add_or_update.items(): - service = BoostComponentService( - organization=organization, - lang_code=lang_code, - version=version, - extensions=extensions, - ) - results[lang_code] = service.process_all( - submodules, user=user, request=request - ) - return results +# Copyright © Boost Organization +# +# SPDX-License-Identifier: GPL-3.0-or-later + +"""Celery tasks for Boost documentation add-or-update (async HTTP handling).""" + +from __future__ import annotations + +from typing import Any + +from weblate.auth.models import AuthenticatedHttpRequest, User +from weblate.boost_endpoint.services import BoostComponentService +from weblate.utils.celery import app + + +@app.task(trail=False) +def boost_add_or_update_task( + *, + organization: str, + add_or_update: dict[str, list[str]], + version: str, + extensions: list[str] | None, + user_id: int, +) -> dict[str, Any]: + """ + Run BoostComponentService for each language (same logic as synchronous POST). + + Exceptions propagate so Celery marks the task failed and monitoring can alert. + """ + user = User.objects.get(pk=user_id) + request = AuthenticatedHttpRequest() + request.user = user + + results: dict[str, Any] = {} + for lang_code, submodules in add_or_update.items(): + service = BoostComponentService( + organization=organization, + lang_code=lang_code, + version=version, + extensions=extensions, + ) + results[lang_code] = service.process_all(submodules, user=user, request=request) + return results From e8bb05c589fd5a88ef196df64d92e9b6729c0113 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 14:14:03 -0600 Subject: [PATCH 12/15] Update due to CI fail. --- docs/admin/boost-weblate.rst | 31 ++++++++++++++------------ docs/formats/quickbook.rst | 2 +- weblate/boost_endpoint/tasks.py | 0 weblate/boost_endpoint/views.py | 0 weblate/utils/openrouter_translator.py | 4 ++-- 5 files changed, 20 insertions(+), 17 deletions(-) mode change 100755 => 100644 weblate/boost_endpoint/tasks.py mode change 100755 => 100644 weblate/boost_endpoint/views.py diff --git a/docs/admin/boost-weblate.rst b/docs/admin/boost-weblate.rst index e861d487bbb3..b5db99954c5b 100644 --- a/docs/admin/boost-weblate.rst +++ b/docs/admin/boost-weblate.rst @@ -30,7 +30,7 @@ core Weblate dependencies; install it explicitly: pip install 'openai>=2.0,<3.0' If the SDK is missing when OpenRouter translation runs, Weblate raises -:class:`django.core.exceptions.ImproperlyConfigured` with an installation hint. +``django.core.exceptions.ImproperlyConfigured`` with an installation hint. Docker images built from :file:`weblate-docker/Dockerfile` use ``WEBLATE_EXTRAS=all`` so the ``openai`` extra is included in the container. @@ -41,17 +41,20 @@ System commands and packages The following executables must be available on the server **PATH** where the relevant code paths execute (web workers, Celery workers): -================ ================================================================ -Executable Used by -================ ================================================================ -``git`` Boost endpoint service: clone repositories, commit and push - translation changes (:mod:`weblate.boost_endpoint.services`). -``po4a-gettextize``, ``po4a-translate`` - AsciiDoc format pipeline (:mod:`weblate.formats.asciidoc`). -``msgattrib``, ``msgfmt`` - gettext toolchain for AsciiDoc save path; - ``msgattrib`` is optional (the code falls back if absent). -================ ================================================================ +.. list-table:: + :header-rows: 1 + :widths: 22 78 + + * - Executable + - Used by + * - ``git`` + - Boost endpoint service: clone repositories, commit and push translation + changes (see ``weblate.boost_endpoint.services``). + * - ``po4a-gettextize``, ``po4a-translate`` + - AsciiDoc format pipeline (``weblate.formats.asciidoc``). + * - ``msgattrib``, ``msgfmt`` + - gettext toolchain for AsciiDoc save path; ``msgattrib`` is optional (the + code falls back if absent). The official Docker image for this fork installs **po4a** from source during the image build (see comments in :file:`weblate-docker/Dockerfile`). Custom or @@ -69,7 +72,7 @@ These variables apply to **Boost fork** behaviour. They do **not** use the API key used when OpenRouter batch translation cannot read credentials from Weblate’s machinery configuration (see :ref:`boost-weblate-openrouter-config`). - Read by :mod:`weblate.trans.autobatchtranslate`. + Read by ``weblate.trans.autobatchtranslate``. .. envvar:: OPENROUTER_MODEL @@ -89,7 +92,7 @@ These variables apply to **Boost fork** behaviour. They do **not** use the Integer seconds to wait when the Boost endpoint waits for a component or translation to become ready before adding a language (polling interval is - derived from this setting in :mod:`weblate.boost_endpoint.services`). + derived from this setting in ``weblate.boost_endpoint.services``). Default in Docker: ``300``. Override per deployment if repositories are slow or fast to sync. diff --git a/docs/formats/quickbook.rst b/docs/formats/quickbook.rst index 5772872a9a7e..aaccfb505857 100644 --- a/docs/formats/quickbook.rst +++ b/docs/formats/quickbook.rst @@ -12,7 +12,7 @@ QuickBook (``.qbk``) is a markup language used in Boost documentation. This Weblate build registers :guilabel:`QuickBook file` as a monolingual :ref:`ConvertFormat ` handler: translatable strings are extracted into gettext PO stores and merged back into QuickBook sources using a built-in parser -(:mod:`weblate.utils.quickbook`). +(``weblate.utils.quickbook``). There is **no** external converter binary (such as ``po4a``) required for QuickBook in this fork—only Python dependencies from the main ``weblate`` diff --git a/weblate/boost_endpoint/tasks.py b/weblate/boost_endpoint/tasks.py old mode 100755 new mode 100644 diff --git a/weblate/boost_endpoint/views.py b/weblate/boost_endpoint/views.py old mode 100755 new mode 100644 diff --git a/weblate/utils/openrouter_translator.py b/weblate/utils/openrouter_translator.py index 88726d817e1e..7b44e15a23a9 100644 --- a/weblate/utils/openrouter_translator.py +++ b/weblate/utils/openrouter_translator.py @@ -53,10 +53,10 @@ def __init__( msg = "Model name is required." raise ValueError(msg) - OpenAIClient = _openai_client_factory() + client_cls = _openai_client_factory() # Initialize OpenAI client with OpenRouter endpoint - self.client = OpenAIClient( + self.client = client_cls( base_url="https://openrouter.ai/api/v1", api_key=api_key, timeout=60 * 20, # 20 minutes From fae22d4cbbb5bc9ba2303f78245a48987c0054f3 Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Thu, 7 May 2026 23:44:18 -0600 Subject: [PATCH 13/15] Fix Linkcheck fail. --- docs/admin/machine.rst | 10 +++++----- docs/conf.py | 15 +++++++++++++++ docs/formats.rst | 2 +- docs/formats/laravel.rst | 2 +- docs/user/checks.rst | 2 +- 5 files changed, 23 insertions(+), 8 deletions(-) diff --git a/docs/admin/machine.rst b/docs/admin/machine.rst index 579423def322..c6cd64441433 100644 --- a/docs/admin/machine.rst +++ b/docs/admin/machine.rst @@ -369,15 +369,15 @@ Weblate supports DeepL formality, it will choose matching one based on the language (for example, there is ``de@formal`` and ``de@informal``). The translation context can optionally be specified to improve translations quality. Read more on that in -`DeepL translation context documentation `_. +`DeepL translation context documentation `_. The service automatically uses :ref:`glossary`, see :ref:`glossary-mt`. .. seealso:: - * `DeepL translator `_ - * `DeepL pricing `_ - * `DeepL API documentation `_ + * `DeepL translator `_ + * `DeepL pricing `_ + * `DeepL API documentation `_ .. _mt-glosbe: @@ -1076,7 +1076,7 @@ This service uses an API, and you need to obtain an ID and an API key from Youda .. seealso:: - `Youdao Zhiyun Natural Language Translation Service `_ + `Youdao Zhiyun Natural Language Translation Service `_ .. _custom-machinery: diff --git a/docs/conf.py b/docs/conf.py index bf45f9d520bb..7baec900cbf8 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -362,6 +362,18 @@ def setup(app) -> None: # Number of retries and timeout for linkcheck linkcheck_retries = 10 linkcheck_timeout = 10 + +# Treat these redirects as working (sites change paths; --fail-on-warning otherwise). +linkcheck_allowed_redirects = { + ( + r"https://support\.okta\.com/help/s/article/" + r"How-to-send-a-custom-relaystate-to-application-through-idp-initiated-authentication-urls" + ): ( + r"https://support\.okta\.com/help/s/article/" + r"How-to-send-a-custom-relaystate-to-application-through-idp-initiated-authentication-urls(\?.*)?" + ), +} + linkcheck_ignore = [ # Local URL to Weblate "http://127.0.0.1:8080/", @@ -402,6 +414,9 @@ def setup(app) -> None: "https://dev.mysql.com/", # Responds with HTTP 418 I'm a teapot "https://www.freedesktop.org/", + # 403 to automated clients (URLs remain valid in browsers) + "https://mymemory\\.translated\\.net/.*", + "https://docs\\.oasis-open\\.org/.*", ] # HTTP docs diff --git a/docs/formats.rst b/docs/formats.rst index 254ed9dcc2a4..5ac8a13a1e27 100644 --- a/docs/formats.rst +++ b/docs/formats.rst @@ -375,7 +375,7 @@ Translation types capabilities .. [#lp] The plurals are supported only for Laravel which uses in string syntax to define them, see `Localization in Laravel`_. .. [#fp] Plurals are handled in the syntax of the strings and not exposed as plurals in Weblate. -.. _Localization in Laravel: https://laravel.com/docs/localization +.. _Localization in Laravel: https://laravel.com/docs/13.x/localization .. _bimono: diff --git a/docs/formats/laravel.rst b/docs/formats/laravel.rst index c3e0f6ce5a9b..54c71277a606 100644 --- a/docs/formats/laravel.rst +++ b/docs/formats/laravel.rst @@ -19,7 +19,7 @@ The Laravel PHP localization files are supported as well with plurals: * :doc:`tt:formats/php` * `Localization in Laravel`_ -.. _Localization in Laravel: https://laravel.com/docs/localization +.. _Localization in Laravel: https://laravel.com/docs/13.x/localization Weblate configuration +++++++++++++++++++++ diff --git a/docs/user/checks.rst b/docs/user/checks.rst index bc8049c2837a..40b695f94de0 100644 --- a/docs/user/checks.rst +++ b/docs/user/checks.rst @@ -742,7 +742,7 @@ Laravel format .. seealso:: * :ref:`check-formats` - * `Laravel translation formatting `_ + * `Laravel translation formatting `_ .. _check-lua-format: From 9d670bfba268e618cfe9255189a73984af22be2b Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Fri, 8 May 2026 05:56:42 -0600 Subject: [PATCH 14/15] Fix Linkcheck fail. --- docs/conf.py | 37 ++++++++++++++++++++++++++++++++++++- 1 file changed, 36 insertions(+), 1 deletion(-) diff --git a/docs/conf.py b/docs/conf.py index 7baec900cbf8..dbb3f2d8d643 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -363,7 +363,9 @@ def setup(app) -> None: linkcheck_retries = 10 linkcheck_timeout = 10 -# Treat these redirects as working (sites change paths; --fail-on-warning otherwise). +# Treat these redirects as working. Linkcheck emits redirects as warnings; CI runs +# sphinx with --fail-on-warning, so expected HTTP redirects must be allowed here +# (or links updated in RST). Patterns use re.match() from the URL start. linkcheck_allowed_redirects = { ( r"https://support\.okta\.com/help/s/article/" @@ -372,6 +374,37 @@ def setup(app) -> None: r"https://support\.okta\.com/help/s/article/" r"How-to-send-a-custom-relaystate-to-application-through-idp-initiated-authentication-urls(\?.*)?" ), + r"https://docs\.djangoproject\.com/en/stable/.*": ( + r"https://docs\.djangoproject\.com/en/[0-9]+\.[0-9]+/.*" + ), + r"https://weblate\.org/?.*": r"https://weblate\.org/.*", + r"https://docs\.weblate\.org/?.*": r"https://docs\.weblate\.org/.*", + r"https://hosted\.weblate\.org/.*": r"https://hosted\.weblate\.org/.*", + r"https://www\.sphinx-doc\.org/?$": r"https://www\.sphinx-doc\.org/en/master/?", + r"https://angular\.io/.*": r"https://.*\.angular\.io/.*", + r"https://babel\.pocoo\.org/?$": r"https://babel\.pocoo\.org/en/latest/.*", + r"https://cryptography\.io/?$": r"https://cryptography\.io/en/latest/.*", + r"https://docs\.celeryq\.dev/?$": r"https://docs\.celeryq\.dev/en/stable/.*", + r"https://docs\.phpmyadmin\.net/?$": r"https://docs\.phpmyadmin\.net/en/latest/.*", + r"https://doc\.galette\.eu/?$": r"https://doc\.galette\.eu/en/master/.*", + r"https://pytest\.org/?$": r"https://docs\.pytest\.org/.*", + r"https://python-social-auth\.readthedocs\.io/?$": ( + r"https://python-social-auth\.readthedocs\.io/en/latest/.*" + ), + r"https://sentry\.io/?$": r"https://sentry\.io/.*", + r"https://ruby-doc\.org/current/.*": r"https://ruby-doc\.org/[0-9.]+/.*", + r"https://docs\.anthropic\.com/.*": r"https://.*\.claude\.com/.*", + r"https://console\.anthropic\.com/.*": r"https://.*\.claude\.com/.*", + r"https://console\.cloud\.google\.com/.*": r"https://accounts\.google\.com/.*", + r"https://console\.developers\.google\.com/.*": r"https://accounts\.google\.com/.*", + r"https://gitee\.com/help/.*": r"https://help\.gitee\.com.*", + r"https://git\.cloudron\.io/.*": r"https://git\.cloudron\.io/.*", + r"https://github\.com/[^/]+/[^/]+/security/advisories/new(\?.*)?$": ( + r"https://github\.com/login.*" + ), + r"https://www\.bestpractices\.dev/en/projects/[0-9]+/?$": ( + r"https://www\.bestpractices\.dev/en/projects/[0-9]+/passing" + ), } linkcheck_ignore = [ @@ -417,6 +450,8 @@ def setup(app) -> None: # 403 to automated clients (URLs remain valid in browsers) "https://mymemory\\.translated\\.net/.*", "https://docs\\.oasis-open\\.org/.*", + # Captcha / bot wall in CI; human documentation links remain valid + "https://cloud\\.yandex\\.com/.*", ] # HTTP docs From 3eabb0fce99340b4c4385db62b2cb1aec8f8696c Mon Sep 17 00:00:00 2001 From: AuraMindNest Date: Fri, 8 May 2026 08:30:06 -0600 Subject: [PATCH 15/15] Fix again --- docs/admin/machine.rst | 2 +- docs/conf.py | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docs/admin/machine.rst b/docs/admin/machine.rst index c6cd64441433..63e305765528 100644 --- a/docs/admin/machine.rst +++ b/docs/admin/machine.rst @@ -622,7 +622,7 @@ You can also specify a custom category to use `custom translator `_ - * `Microsoft Azure Portal `_ + * `Microsoft Azure Portal `_ * `Base URLs `_ * `"Authenticating with a Multi-service resource" `_ * `"Authenticating with an access token" section `_ diff --git a/docs/conf.py b/docs/conf.py index dbb3f2d8d643..3183715ff190 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -361,7 +361,8 @@ def setup(app) -> None: # Number of retries and timeout for linkcheck linkcheck_retries = 10 -linkcheck_timeout = 10 +# Default 10s is tight for some CDNs from GitHub Actions (e.g. contributor-covenant.org). +linkcheck_timeout = 45 # Treat these redirects as working. Linkcheck emits redirects as warnings; CI runs # sphinx with --fail-on-warning, so expected HTTP redirects must be allowed here @@ -379,7 +380,7 @@ def setup(app) -> None: ), r"https://weblate\.org/?.*": r"https://weblate\.org/.*", r"https://docs\.weblate\.org/?.*": r"https://docs\.weblate\.org/.*", - r"https://hosted\.weblate\.org/.*": r"https://hosted\.weblate\.org/.*", + r"https://hosted\.weblate\.org.*": r"https://hosted\.weblate\.org.*", r"https://www\.sphinx-doc\.org/?$": r"https://www\.sphinx-doc\.org/en/master/?", r"https://angular\.io/.*": r"https://.*\.angular\.io/.*", r"https://babel\.pocoo\.org/?$": r"https://babel\.pocoo\.org/en/latest/.*",