From 3055deda65fd921c6b48fe59e1c277d5ec648dac Mon Sep 17 00:00:00 2001 From: ppippi Date: Mon, 11 Aug 2025 00:21:09 +0900 Subject: [PATCH] =?UTF-8?q?=EB=B2=88=EC=97=AD=20workflow=20=EC=9E=90?= =?UTF-8?q?=EB=8F=99=ED=99=94?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/jekyll-docker.yml | 10 +- .github/workflows/translate-to-english.yml | 75 +++++ .../2025-07-03-actions-runner-controller.md | 1 - .../2025-07-03-actions-runner-controller.md | 272 ------------------ scripts/translate_to_en.py | 177 ++++++++++++ 5 files changed, 258 insertions(+), 277 deletions(-) create mode 100644 .github/workflows/translate-to-english.yml delete mode 100644 _posts_en/2025-07-03-actions-runner-controller.md create mode 100644 scripts/translate_to_en.py diff --git a/.github/workflows/jekyll-docker.yml b/.github/workflows/jekyll-docker.yml index 5426821..c35cdc0 100644 --- a/.github/workflows/jekyll-docker.yml +++ b/.github/workflows/jekyll-docker.yml @@ -3,8 +3,9 @@ name: Build and Deploy to GitHub Pages on: push: branches: [ "main" ] - pull_request: - branches: [ "main" ] + workflow_run: + workflows: ["Translate new posts to English"] + types: [completed] # GitHub Pages에 배포하기 위한 권한 설정 permissions: @@ -15,10 +16,11 @@ permissions: # 동시 배포 방지 concurrency: group: "pages" - cancel-in-progress: false + cancel-in-progress: true jobs: build: + if: ${{ github.event_name == 'push' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') }} runs-on: ubuntu-latest steps: @@ -44,7 +46,7 @@ jobs: url: ${{ steps.deployment.outputs.page_url }} runs-on: ubuntu-latest needs: build - if: github.ref == 'refs/heads/main' && github.event_name == 'push' + if: ${{ github.event_name == 'push' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') }} steps: - name: Deploy to GitHub Pages diff --git a/.github/workflows/translate-to-english.yml b/.github/workflows/translate-to-english.yml new file mode 100644 index 0000000..f6c126d --- /dev/null +++ b/.github/workflows/translate-to-english.yml @@ -0,0 +1,75 @@ +name: Translate new posts to English + +on: + pull_request_target: + types: [closed] + +permissions: + contents: write + +jobs: + translate: + if: github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main' + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v4 + with: + ref: main + + - name: Get changed files from PR + id: changes + uses: actions/github-script@v7 + with: + script: | + const pr = context.payload.pull_request; + const { owner, repo } = context.repo; + const pull_number = pr.number; + const per_page = 100; + let page = 1; + const files = []; + while (true) { + const res = await github.rest.pulls.listFiles({ owner, repo, pull_number, per_page, page }); + if (res.data.length === 0) break; + for (const f of res.data) files.push(f.filename); + if (res.data.length < per_page) break; + page += 1; + } + const posts = files.filter(f => f.startsWith('_posts/') && f.endsWith('.md')); + core.setOutput('posts', posts.join('\n')); + + - name: Set ONLY_FILES env + if: steps.changes.outputs.posts != '' + run: | + echo 'ONLY_FILES<> $GITHUB_ENV + echo '${{ steps.changes.outputs.posts }}' >> $GITHUB_ENV + echo 'EOF' >> $GITHUB_ENV + + - name: Set up Python + if: steps.changes.outputs.posts != '' + uses: actions/setup-python@v5 + with: + python-version: '3.11' + + - name: Install dependencies + if: steps.changes.outputs.posts != '' + run: | + python -m pip install --upgrade pip + pip install openai PyYAML python-frontmatter python-slugify + + - name: Generate English translations + if: steps.changes.outputs.posts != '' + env: + OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} + TRANSLATION_MODEL: ${{ vars.TRANSLATION_MODEL }} + run: | + python scripts/translate_to_en.py + + - name: Commit and push translations + if: steps.changes.outputs.posts != '' + uses: stefanzweifel/git-auto-commit-action@v5 + with: + commit_message: "chore: add English translations for PR #${{ github.event.pull_request.number }}" + file_pattern: "_posts_en/**/*.md" + + diff --git a/_posts/2025-07-03-actions-runner-controller.md b/_posts/2025-07-03-actions-runner-controller.md index d7a918f..6dec20a 100644 --- a/_posts/2025-07-03-actions-runner-controller.md +++ b/_posts/2025-07-03-actions-runner-controller.md @@ -285,4 +285,3 @@ docker run -it \ ARC를 사용하면 GitHub에서 제공하는 Runner를 사용할 때의 비싼 비용 문제와, 직접 VM을 관리하며 Runner를 운영할 때의 비효율성을 모두 해결할 수 있습니다. 특히 GPU가 필요하거나, 복잡한 의존성을 가진 MLOps CI/CD 환경을 구축할 때 ARC는 매우 강력한 도구가 됩니다. 초기 설정 과정이 다소 복잡하게 느껴질 수 있지만, 한번 구축해두면 CI/CD 비용을 크게 절감하고 운영 부담을 덜어주므로 MLOps를 고민하고 있다면 꼭 한번 도입을 검토해보시길 바랍니다. - diff --git a/_posts_en/2025-07-03-actions-runner-controller.md b/_posts_en/2025-07-03-actions-runner-controller.md deleted file mode 100644 index 1a816dd..0000000 --- a/_posts_en/2025-07-03-actions-runner-controller.md +++ /dev/null @@ -1,272 +0,0 @@ ---- -layout: post -title: "Building Actions Runner Controller" -subtitle: "Setting up MLOps CI Environment" -feature-img: "assets/img/2025-07-03/0.png" -tags: [MLOps, Infra] ---- - -### Intro - -Recently, as I've been enjoying AI-based development, I've become even more aware of the importance of testing environments. - -The most representative method would be building CI using GitHub Actions, but in MLOps, high-spec instances are often needed for CI. - -Of course, there are [GPU instances (Linux 4 cores)](https://docs.github.com/en/billing/managing-billing-for-your-products/about-billing-for-github-actions) provided by GitHub Actions, but they are set at a very expensive rate of $0.07 per minute as of now, making them burdensome to use. - -The GPU type is also limited to Nvidia T4 GPU, which has performance constraints as model sizes continue to grow. - -In this situation, self-hosted runners exist as an alternative. - -Literally, it's a method of setting up runners directly and executing GitHub workflows on those runners. - -This method can be set up through GitHub's [Adding self-hosted runners](https://docs.github.com/en/actions/how-tos/hosting-your-own-runners/managing-self-hosted-runners/adding-self-hosted-runners) guide. - -However, this method has the problem of needing to keep CI machines always on (online status). It can be inefficient when CI/CD tasks occur rarely. - -Against this background, **Actions Runner Controller (ARC)** emerges as an excellent alternative. - -[Actions Runner Controller](https://github.com/actions/actions-runner-controller) is an open source that controls GitHub Actions runners to operate in Kubernetes environments. - -Using this, you can test CI using your own Kubernetes resources only when GitHub Actions workflows are executed. - - -### Installing Actions Runner Controller - -The ARC installation process is divided into two main steps: -1. Creating a **GitHub Personal Access Token** for communication and authentication with GitHub -2. **Installing ARC** using Helm and authenticating with the created token - -#### 1. Creating GitHub Personal Access Token - -ARC needs authentication to interact with GitHub API and register and manage runners. For this, we create a GitHub Personal Access Token (PAT). - -* **Path**: `Settings` > `Developer settings` > `Personal access tokens` > `Tokens (classic)` > `Generate new token` - -When creating a Personal Access Token, you need to select [appropriate permissions](https://github.com/actions/actions-runner-controller/blob/master/docs/authenticating-to-the-github-api.md#deploying-using-pat-authentication). (For convenience, grant full permissions) - -> For security, it's recommended to set minimum permissions and key expiration period. - -It seems that GitHub App authentication is recommended over Personal Access Token (PAT) method. - -Keep the created Personal Access Token safe as it will be needed when installing ARC in the next step. - -#### 2. Installing ARC with Helm - -Before installing ARC, cert-manager is required. If cert-manager is not set up in the cluster, install it. - -```bash -kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.8.2/cert-manager.yaml -``` - -Now it's time to install ARC to the Kubernetes cluster using Helm. - -Install ARC using the Personal Access Token created earlier. In the command below, change the `YOUR_GITHUB_TOKEN` value to the PAT value you created earlier. - -```bash -helm repo add actions-runner-controller https://actions-runner-controller.github.io/actions-runner-controller - -helm repo update - -helm pull actions-runner-controller/actions-runner-controller - -tar -zxvf actions-runner-controller-*.tgz - -export GITHUB_TOKEN=YOUR_GITHUB_TOKEN - -helm upgrade --install actions-runner-controller ./actions-runner-controller \ - --namespace actions-runner-system \ - --create-namespace \ - --set authSecret.create=true \ - --set authSecret.github_token="${GITHUB_TOKEN}" -``` - -After installation is complete, check if the ARC controller is running normally with the following command: - -```bash -kubectl get pods -n actions-runner-system -``` - -If the above command executes successfully, you can confirm that the ARC controller manager pod is running in the `actions-runner-system` namespace. - -Now ARC is ready to communicate with GitHub! The next step is to define the runners that will actually execute the workflows. - -### 3. Setting up Runners - -We've installed the ARC controller, but there are no runners yet to execute workflows. Now we need to create runner pods according to GitHub Actions workflow jobs. - -For this, we use two types of resources: -1. `RunnerDeployment`: Serves as a template for runner pods. It defines what container image to use, which GitHub repository to connect to, what labels to have, etc. -2. `HorizontalRunnerAutoscaler` (HRA): Observes the `RunnerDeployment` and automatically adjusts the number of replicas of the `RunnerDeployment` based on the number of queued jobs on GitHub. - -#### Defining RunnerDeployment - -First, create a file named `runner-deployment.yml` as shown below. Change the `spec.template.spec.repository` value to your GitHub repository name. - -> You can specify not only repository but also organization if you have permissions. - -```yaml -apiVersion: actions.summerwind.dev/v1alpha1 -kind: RunnerDeployment -metadata: - name: example-runner-deployment - namespace: actions-runner-system -spec: - replicas: 1 - template: - spec: - repository: / - labels: - - self-hosted - - arc-runner -``` - -With this configuration, you can check the GitHub Repo Actions self-hosted runner. - - - -After deployment is complete, you can confirm that a new runner with `self-hosted` and `arc-runner` labels is registered in the **Settings > Actions > Runners** tab of your GitHub repository. - -#### Defining HorizontalRunnerAutoscaler - -Next, define an HRA to automatically scale the `RunnerDeployment` created above. Create an `hra.yml` file. - -```yaml -apiVersion: actions.summerwind.dev/v1alpha1 -kind: HorizontalRunnerAutoscaler -metadata: - name: example-hra - namespace: actions-runner-system -spec: - scaleTargetRef: - name: example-runner-deployment - minReplicas: 0 - maxReplicas: 5 -``` - -By specifying minReplicas and maxReplicas, you can scale up and down according to resources. - -Or you can specify additional metrics to create pods whenever there's a workflow trigger. Various other metrics exist besides this. - -> When HorizontalRunnerAutoscaler is configured, runners are created only when needed, so normally (when there are 0 runners) you cannot see runners in the GitHub UI. - - - -```yaml -apiVersion: actions.summerwind.dev/v1alpha1 -kind: HorizontalRunnerAutoscaler -metadata: - name: example-hra - namespace: actions-runner-system -spec: - scaleTargetRef: - name: example-runner-deployment - minReplicas: 0 - maxReplicas: 5 - metrics: - - type: TotalNumberOfQueuedAndInProgressWorkflowRuns - repositoryNames: ["/"] -``` - -This is my most preferred metric, which scales up when workflow execution is needed (when in Queue status). -Like this, you can specify metrics according to your needs to achieve good results. - -### 4. Using in GitHub Actions Workflows - -Now all settings are complete! Using the newly created ARC runner is very simple. Just put the labels specified in the `RunnerDeployment` in the `runs-on` key in your workflow file. - -Let's add a simple test workflow (`test-arc.yml`) to the `.github/workflows/` directory of your repository as shown below. - -```yaml -name: ARC Runner Test - -on: - push: - branches: - - main - -jobs: - test-job: - runs-on: [self-hosted, arc-runner] - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Test - run: | - echo "Hello from an ARC runner!" - echo "This runner is running inside a Kubernetes pod." - sleep 10 -``` - -The `runs-on: [self-hosted, arc-runner]` part is key. When this workflow runs, GitHub assigns the job to a runner that has both `self-hosted` and `arc-runner` labels. ARC detects this event and, if needed according to HRA settings, creates new runner pods to process the job. - -> When configured as self-hosted, unlike GitHub's default runners, you may need to install some packages in the workflow. - -### Troubleshooting Records - -One common problem when using Docker frequently for CI/CD is the DinD (Docker in Docker) issue. - -In ARC's case, by default, a runner scheduling container and a docker daemon container come up together in a sidecar structure. - -To solve such cases, there are docker images that support DinD. - -Like the following yaml file, by specifying image and dockerdWithinRunnerContainer, the docker daemon runs inside the runner and the workflow executes on that runner. - -```yaml -apiVersion: actions.summerwind.dev/v1alpha1 -kind: RunnerDeployment -metadata: - name: example-runner-deployment - namespace: actions-runner-system -spec: - replicas: 1 - template: - spec: - repository: / - labels: - - self-hosted - - arc-runner - image: "summerwind/actions-runner-dind:latest" - dockerdWithinRunnerContainer: true -``` - -Especially for docker tests that require GPU, using the above DinD image on a cluster with NVIDIA Container Toolkit installed allows GPU recognition. - -If you configure as shown below in the workflow you want to run, you can confirm that GPU is properly set up even in DinD situations. -(NVIDIA Container Toolkit and NVIDIA GPU Driver Plugin version check is necessary!) - -```bash -# Check GPU devices -ls -la /dev/nvidia* - -# device library setup -smi_path=$(find / -name "nvidia-smi" 2>/dev/null | head -n 1) -lib_path=$(find / -name "libnvidia-ml.so" 2>/dev/null | head -n 1) -lib_dir=$(dirname "$lib_path") -export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(dirname "$lib_path") -export NVIDIA_VISIBLE_DEVICES=all -export NVIDIA_DRIVER_CAPABILITIES=compute,utility - -# Direct GPU device and library mount without nvidia runtime -docker run -it \ - --device=/dev/nvidia0:/dev/nvidia0 \ - --device=/dev/nvidiactl:/dev/nvidiactl \ - --device=/dev/nvidia-uvm:/dev/nvidia-uvm \ - --device=/dev/nvidia-uvm-tools:/dev/nvidia-uvm-tools \ - -v "$lib_dir:$lib_dir:ro" \ - -v "$(dirname $smi_path):$(dirname $smi_path):ro" \ - -e LD_LIBRARY_PATH="$LD_LIBRARY_PATH" \ - -e NVIDIA_VISIBLE_DEVICES="$NVIDIA_VISIBLE_DEVICES" \ - -e NVIDIA_DRIVER_CAPABILITIES="$NVIDIA_DRIVER_CAPABILITIES" \ - pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime -``` - -### Conclusion - -We've explored how to build Actions Runner Controller in a Kubernetes environment to create a dynamically scalable self-hosted runner environment. - -Using ARC can solve both the expensive cost problem when using GitHub-provided runners and the inefficiency when directly managing VMs to operate runners. Especially when building MLOps CI/CD environments that require GPU or have complex dependencies, ARC becomes a very powerful tool. - -The initial setup process may feel somewhat complex, but once built, it significantly reduces CI/CD costs and operational burden, so if you're considering MLOps, I highly recommend considering its adoption. - diff --git a/scripts/translate_to_en.py b/scripts/translate_to_en.py new file mode 100644 index 0000000..a4515d3 --- /dev/null +++ b/scripts/translate_to_en.py @@ -0,0 +1,177 @@ +#!/usr/bin/env python3 +import os +import re +import argparse +import sys +from pathlib import Path + +import frontmatter +import yaml +from slugify import slugify + +try: + from openai import OpenAI +except Exception: + print("[ERROR] openai package not available. Make sure it's installed.") + sys.exit(1) + + +REPO_ROOT = Path(__file__).resolve().parent.parent +POSTS_DIR = REPO_ROOT / "_posts" +POSTS_EN_DIR = REPO_ROOT / "_posts_en" + + +PROMPT_SYSTEM = ( + "You are an expert technical writer and translator. Translate the given Korean Jekyll blog post content into natural, concise American English.\n" + "- Preserve front matter keys; translate 'title' and any 'description' or 'summary' fields to English.\n" + "- Keep Markdown structure, headings, code blocks, links, images, and footnotes intact.\n" + "- Do not hallucinate code or change code semantics.\n" + "- Maintain YAML front matter formatting.\n" + "- If there is mixed language, prefer English.\n" +) + + +def normalize_repo_path(path_like: str | Path) -> Path: + p = Path(path_like) + return (REPO_ROOT / p).resolve() if not p.is_absolute() else p.resolve() + + +def load_korean_posts(only_paths: list[Path] | None = None) -> list[Path]: + if only_paths: + results: list[Path] = [] + for p in only_paths: + abs_p = normalize_repo_path(p) + try: + abs_p.relative_to(POSTS_DIR) + except ValueError: + # skip files outside _posts + continue + if abs_p.exists() and abs_p.suffix == ".md": + results.append(abs_p) + # de-duplicate and sort + return sorted({p for p in results}) + return sorted(POSTS_DIR.glob("*.md")) + + +def to_en_filename(source_path: Path, en_title: str) -> Path: + # source: YYYY-MM-DD-title.md -> keep date, replace slug with English slug + date_prefix = source_path.name.split("-", 3)[:3] + # Build english slug from translated title + en_slug = slugify(en_title) + filename = f"{date_prefix[0]}-{date_prefix[1]}-{date_prefix[2]}-{en_slug}.md" + return POSTS_EN_DIR / filename + + +def needs_translation(src_file: Path) -> bool: + try: + post = frontmatter.load(src_file) + except Exception: + return True + title = str(post.get("title", "")) + # If corresponding EN file already exists, skip + en_filename = to_en_filename(src_file, title or src_file.stem) + if en_filename.exists(): + return False + # If any file on the same date exists in _posts_en, we might have different slug; check by date + date_prefix = "-".join(src_file.name.split("-", 3)[:3]) + matches = list(POSTS_EN_DIR.glob(f"{date_prefix}-*.md")) + return len(matches) == 0 + + +def build_prompt(post: frontmatter.Post) -> str: + fm_yaml = yaml.safe_dump(dict(post), allow_unicode=True, sort_keys=False).strip() + content = post.content + return ( + "Translate the following Jekyll Markdown post to English.\n\n" + "---\n" + fm_yaml + "\n---\n\n" + content + ) + + +def call_openai(model: str, system_prompt: str, user_prompt: str) -> str: + api_key = os.environ.get("OPENAI_API_KEY") + if not api_key: + print("[ERROR] OPENAI_API_KEY is not set. Skipping translation.") + sys.exit(2) + client = OpenAI(api_key=api_key) + resp = client.chat.completions.create( + model=model, + messages=[ + {"role": "system", "content": system_prompt}, + {"role": "user", "content": user_prompt}, + ], + temperature=0.3, + ) + return resp.choices[0].message.content + + +def split_front_matter(translated_markdown: str) -> tuple[dict, str]: + # Expect YAML front matter at the top + m = re.match(r"^---\n([\s\S]+?)\n---\n?([\s\S]*)$", translated_markdown.strip()) + if not m: + return {}, translated_markdown + fm_text, body = m.group(1), m.group(2) + try: + fm = yaml.safe_load(fm_text) or {} + if not isinstance(fm, dict): + fm = {} + except Exception: + fm = {} + return fm, body + + +def ensure_posts_en_dir(): + POSTS_EN_DIR.mkdir(parents=True, exist_ok=True) + + +def main() -> int: + parser = argparse.ArgumentParser( + description="Translate Korean _posts to English _posts_en" + ) + parser.add_argument( + "--only", nargs="*", help="Only translate these paths under _posts/" + ) + args = parser.parse_args() + + ensure_posts_en_dir() + model = os.environ.get("TRANSLATION_MODEL", "gpt-5.1-mini") + only_files: list[str] = [] + if args.only: + only_files.extend(args.only) + env_only = os.environ.get("ONLY_FILES") + if env_only: + only_files.extend( + [line.strip() for line in env_only.splitlines() if line.strip()] + ) + + only_paths = [Path(p) for p in only_files] if only_files else None + + created = 0 + force = only_paths is not None + for src in load_korean_posts(only_paths): + if not force and not needs_translation(src): + continue + post = frontmatter.load(src) + prompt = build_prompt(post) + translated = call_openai(model, PROMPT_SYSTEM, prompt) + fm, body = split_front_matter(translated) + + # Fallbacks if model didn't preserve FM properly + if not fm: + fm = dict(post) + if "title" not in fm or not fm["title"]: + fm["title"] = dict(post).get("title", "") + + # Construct english filename from translated title + en_path = to_en_filename(src, str(fm.get("title", src.stem))) + en_post = frontmatter.Post(body, **fm) + with open(en_path, "w", encoding="utf-8") as f: + f.write(frontmatter.dumps(en_post)) + print(f"[translate] Created: {en_path.relative_to(REPO_ROOT)}") + created += 1 + + print(f"[translate] New English posts: {created}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main())