From 3055deda65fd921c6b48fe59e1c277d5ec648dac Mon Sep 17 00:00:00 2001
From: ppippi <wjdqlsdlsp@naver.com>
Date: Mon, 11 Aug 2025 00:21:09 +0900
Subject: [PATCH] =?UTF-8?q?=EB=B2=88=EC=97=AD=20workflow=20=EC=9E=90?=
 =?UTF-8?q?=EB=8F=99=ED=99=94?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .github/workflows/jekyll-docker.yml           |  10 +-
 .github/workflows/translate-to-english.yml    |  75 +++++
 .../2025-07-03-actions-runner-controller.md   |   1 -
 .../2025-07-03-actions-runner-controller.md   | 272 ------------------
 scripts/translate_to_en.py                    | 177 ++++++++++++
 5 files changed, 258 insertions(+), 277 deletions(-)
 create mode 100644 .github/workflows/translate-to-english.yml
 delete mode 100644 _posts_en/2025-07-03-actions-runner-controller.md
 create mode 100644 scripts/translate_to_en.py

diff --git a/.github/workflows/jekyll-docker.yml b/.github/workflows/jekyll-docker.yml
index 5426821..c35cdc0 100644
--- a/.github/workflows/jekyll-docker.yml
+++ b/.github/workflows/jekyll-docker.yml
@@ -3,8 +3,9 @@ name: Build and Deploy to GitHub Pages
 on:
   push:
     branches: [ "main" ]
-  pull_request:
-    branches: [ "main" ]
+  workflow_run:
+    workflows: ["Translate new posts to English"]
+    types: [completed]
 
 # GitHub Pages에 배포하기 위한 권한 설정
 permissions:
@@ -15,10 +16,11 @@ permissions:
 # 동시 배포 방지
 concurrency:
   group: "pages"
-  cancel-in-progress: false
+  cancel-in-progress: true
 
 jobs:
   build:
+    if: ${{ github.event_name == 'push' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') }}
     runs-on: ubuntu-latest
     
     steps:
@@ -44,7 +46,7 @@ jobs:
       url: ${{ steps.deployment.outputs.page_url }}
     runs-on: ubuntu-latest
     needs: build
-    if: github.ref == 'refs/heads/main' && github.event_name == 'push'
+    if: ${{ github.event_name == 'push' || (github.event_name == 'workflow_run' && github.event.workflow_run.conclusion == 'success') }}
     
     steps:
     - name: Deploy to GitHub Pages
diff --git a/.github/workflows/translate-to-english.yml b/.github/workflows/translate-to-english.yml
new file mode 100644
index 0000000..f6c126d
--- /dev/null
+++ b/.github/workflows/translate-to-english.yml
@@ -0,0 +1,75 @@
+name: Translate new posts to English
+
+on:
+  pull_request_target:
+    types: [closed]
+
+permissions:
+  contents: write
+
+jobs:
+  translate:
+    if: github.event.pull_request.merged == true && github.event.pull_request.base.ref == 'main'
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout main
+        uses: actions/checkout@v4
+        with:
+          ref: main
+
+      - name: Get changed files from PR
+        id: changes
+        uses: actions/github-script@v7
+        with:
+          script: |
+            const pr = context.payload.pull_request;
+            const { owner, repo } = context.repo;
+            const pull_number = pr.number;
+            const per_page = 100;
+            let page = 1;
+            const files = [];
+            while (true) {
+              const res = await github.rest.pulls.listFiles({ owner, repo, pull_number, per_page, page });
+              if (res.data.length === 0) break;
+              for (const f of res.data) files.push(f.filename);
+              if (res.data.length < per_page) break;
+              page += 1;
+            }
+            const posts = files.filter(f => f.startsWith('_posts/') && f.endsWith('.md'));
+            core.setOutput('posts', posts.join('\n'));
+
+      - name: Set ONLY_FILES env
+        if: steps.changes.outputs.posts != ''
+        run: |
+          echo 'ONLY_FILES<<EOF' >> $GITHUB_ENV
+          echo '${{ steps.changes.outputs.posts }}' >> $GITHUB_ENV
+          echo 'EOF' >> $GITHUB_ENV
+
+      - name: Set up Python
+        if: steps.changes.outputs.posts != ''
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+
+      - name: Install dependencies
+        if: steps.changes.outputs.posts != ''
+        run: |
+          python -m pip install --upgrade pip
+          pip install openai PyYAML python-frontmatter python-slugify
+
+      - name: Generate English translations
+        if: steps.changes.outputs.posts != ''
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          TRANSLATION_MODEL: ${{ vars.TRANSLATION_MODEL }}
+        run: |
+          python scripts/translate_to_en.py
+
+      - name: Commit and push translations
+        if: steps.changes.outputs.posts != ''
+        uses: stefanzweifel/git-auto-commit-action@v5
+        with:
+          commit_message: "chore: add English translations for PR #${{ github.event.pull_request.number }}"
+          file_pattern: "_posts_en/**/*.md"
+
+
diff --git a/_posts/2025-07-03-actions-runner-controller.md b/_posts/2025-07-03-actions-runner-controller.md
index d7a918f..6dec20a 100644
--- a/_posts/2025-07-03-actions-runner-controller.md
+++ b/_posts/2025-07-03-actions-runner-controller.md
@@ -285,4 +285,3 @@ docker run -it \
 ARC를 사용하면 GitHub에서 제공하는 Runner를 사용할 때의 비싼 비용 문제와, 직접 VM을 관리하며 Runner를 운영할 때의 비효율성을 모두 해결할 수 있습니다. 특히 GPU가 필요하거나, 복잡한 의존성을 가진 MLOps CI/CD 환경을 구축할 때 ARC는 매우 강력한 도구가 됩니다.
 
 초기 설정 과정이 다소 복잡하게 느껴질 수 있지만, 한번 구축해두면 CI/CD 비용을 크게 절감하고 운영 부담을 덜어주므로 MLOps를 고민하고 있다면 꼭 한번 도입을 검토해보시길 바랍니다.
-
diff --git a/_posts_en/2025-07-03-actions-runner-controller.md b/_posts_en/2025-07-03-actions-runner-controller.md
deleted file mode 100644
index 1a816dd..0000000
--- a/_posts_en/2025-07-03-actions-runner-controller.md
+++ /dev/null
@@ -1,272 +0,0 @@
----
-layout: post
-title: "Building Actions Runner Controller"
-subtitle: "Setting up MLOps CI Environment"
-feature-img: "assets/img/2025-07-03/0.png"
-tags: [MLOps, Infra]
----
-
-### Intro
-
-Recently, as I've been enjoying AI-based development, I've become even more aware of the importance of testing environments.
-
-The most representative method would be building CI using GitHub Actions, but in MLOps, high-spec instances are often needed for CI.
-
-Of course, there are [GPU instances (Linux 4 cores)](https://docs.github.com/en/billing/managing-billing-for-your-products/about-billing-for-github-actions) provided by GitHub Actions, but they are set at a very expensive rate of $0.07 per minute as of now, making them burdensome to use.
-
-The GPU type is also limited to Nvidia T4 GPU, which has performance constraints as model sizes continue to grow.
-
-In this situation, self-hosted runners exist as an alternative.
-
-Literally, it's a method of setting up runners directly and executing GitHub workflows on those runners.
-
-This method can be set up through GitHub's [Adding self-hosted runners](https://docs.github.com/en/actions/how-tos/hosting-your-own-runners/managing-self-hosted-runners/adding-self-hosted-runners) guide.
-
-However, this method has the problem of needing to keep CI machines always on (online status). It can be inefficient when CI/CD tasks occur rarely.
-
-Against this background, **Actions Runner Controller (ARC)** emerges as an excellent alternative.
-
-[Actions Runner Controller](https://github.com/actions/actions-runner-controller) is an open source that controls GitHub Actions runners to operate in Kubernetes environments.
-
-Using this, you can test CI using your own Kubernetes resources only when GitHub Actions workflows are executed.
-
-
-### Installing Actions Runner Controller
-
-The ARC installation process is divided into two main steps:
-1. Creating a **GitHub Personal Access Token** for communication and authentication with GitHub
-2. **Installing ARC** using Helm and authenticating with the created token
-
-#### 1. Creating GitHub Personal Access Token
-
-ARC needs authentication to interact with GitHub API and register and manage runners. For this, we create a GitHub Personal Access Token (PAT).
-
-* **Path**: `Settings` > `Developer settings` > `Personal access tokens` > `Tokens (classic)` > `Generate new token`
-
-When creating a Personal Access Token, you need to select [appropriate permissions](https://github.com/actions/actions-runner-controller/blob/master/docs/authenticating-to-the-github-api.md#deploying-using-pat-authentication). (For convenience, grant full permissions)
-
-> For security, it's recommended to set minimum permissions and key expiration period.
-
-It seems that GitHub App authentication is recommended over Personal Access Token (PAT) method.
-
-Keep the created Personal Access Token safe as it will be needed when installing ARC in the next step.
-
-#### 2. Installing ARC with Helm
-
-Before installing ARC, cert-manager is required. If cert-manager is not set up in the cluster, install it.
-
-```bash
-kubectl apply -f https://github.com/cert-manager/cert-manager/releases/download/v1.8.2/cert-manager.yaml
-```
-
-Now it's time to install ARC to the Kubernetes cluster using Helm.
-
-Install ARC using the Personal Access Token created earlier. In the command below, change the `YOUR_GITHUB_TOKEN` value to the PAT value you created earlier.
-
-```bash
-helm repo add actions-runner-controller https://actions-runner-controller.github.io/actions-runner-controller
-
-helm repo update
-
-helm pull actions-runner-controller/actions-runner-controller
-
-tar -zxvf actions-runner-controller-*.tgz
-
-export GITHUB_TOKEN=YOUR_GITHUB_TOKEN
-
-helm upgrade --install actions-runner-controller ./actions-runner-controller \
-  --namespace actions-runner-system \
-  --create-namespace \
-  --set authSecret.create=true \
-  --set authSecret.github_token="${GITHUB_TOKEN}"
-```
-
-After installation is complete, check if the ARC controller is running normally with the following command:
-
-```bash
-kubectl get pods -n actions-runner-system
-```
-
-If the above command executes successfully, you can confirm that the ARC controller manager pod is running in the `actions-runner-system` namespace.
-
-Now ARC is ready to communicate with GitHub! The next step is to define the runners that will actually execute the workflows.
-
-### 3. Setting up Runners
-
-We've installed the ARC controller, but there are no runners yet to execute workflows. Now we need to create runner pods according to GitHub Actions workflow jobs.
-
-For this, we use two types of resources:
-1. `RunnerDeployment`: Serves as a template for runner pods. It defines what container image to use, which GitHub repository to connect to, what labels to have, etc.
-2. `HorizontalRunnerAutoscaler` (HRA): Observes the `RunnerDeployment` and automatically adjusts the number of replicas of the `RunnerDeployment` based on the number of queued jobs on GitHub.
-
-#### Defining RunnerDeployment
-
-First, create a file named `runner-deployment.yml` as shown below. Change the `spec.template.spec.repository` value to your GitHub repository name.
-
-> You can specify not only repository but also organization if you have permissions.
-
-```yaml
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: RunnerDeployment
-metadata:
-  name: example-runner-deployment
-  namespace: actions-runner-system
-spec:
-  replicas: 1
-  template:
-    spec:
-      repository: <YOUR_NAME>/<YOUR_REPO_NAME>
-      labels:
-        - self-hosted
-        - arc-runner
-```
-
-With this configuration, you can check the GitHub Repo Actions self-hosted runner.
-
-<img src="/assets/img/2025-07-03/1.png">
-
-After deployment is complete, you can confirm that a new runner with `self-hosted` and `arc-runner` labels is registered in the **Settings > Actions > Runners** tab of your GitHub repository.
-
-#### Defining HorizontalRunnerAutoscaler
-
-Next, define an HRA to automatically scale the `RunnerDeployment` created above. Create an `hra.yml` file.
-
-```yaml
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: HorizontalRunnerAutoscaler
-metadata:
-  name: example-hra
-  namespace: actions-runner-system
-spec:
-  scaleTargetRef:
-    name: example-runner-deployment
-  minReplicas: 0
-  maxReplicas: 5
-```
-
-By specifying minReplicas and maxReplicas, you can scale up and down according to resources.
-
-Or you can specify additional metrics to create pods whenever there's a workflow trigger. Various other metrics exist besides this.
-
-> When HorizontalRunnerAutoscaler is configured, runners are created only when needed, so normally (when there are 0 runners) you cannot see runners in the GitHub UI.
-
-<img src="/assets/img/2025-07-03/2.png">
-
-```yaml
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: HorizontalRunnerAutoscaler
-metadata:
-  name: example-hra
-  namespace: actions-runner-system
-spec:
-  scaleTargetRef:
-    name: example-runner-deployment
-  minReplicas: 0
-  maxReplicas: 5
-  metrics:
-  - type: TotalNumberOfQueuedAndInProgressWorkflowRuns
-    repositoryNames: ["<YOUR_NAME>/<YOUR_REPO_NAME>"]
-```
-
-This is my most preferred metric, which scales up when workflow execution is needed (when in Queue status).
-Like this, you can specify metrics according to your needs to achieve good results.
-
-### 4. Using in GitHub Actions Workflows
-
-Now all settings are complete! Using the newly created ARC runner is very simple. Just put the labels specified in the `RunnerDeployment` in the `runs-on` key in your workflow file.
-
-Let's add a simple test workflow (`test-arc.yml`) to the `.github/workflows/` directory of your repository as shown below.
-
-```yaml
-name: ARC Runner Test
-
-on:
-  push:
-    branches:
-      - main
-
-jobs:
-  test-job:
-    runs-on: [self-hosted, arc-runner]
-    steps:
-      - name: Checkout code
-        uses: actions/checkout@v3
-
-      - name: Test
-        run: |
-          echo "Hello from an ARC runner!"
-          echo "This runner is running inside a Kubernetes pod."
-          sleep 10
-```
-
-The `runs-on: [self-hosted, arc-runner]` part is key. When this workflow runs, GitHub assigns the job to a runner that has both `self-hosted` and `arc-runner` labels. ARC detects this event and, if needed according to HRA settings, creates new runner pods to process the job.
-
-> When configured as self-hosted, unlike GitHub's default runners, you may need to install some packages in the workflow.
-
-### Troubleshooting Records
-
-One common problem when using Docker frequently for CI/CD is the DinD (Docker in Docker) issue.
-
-In ARC's case, by default, a runner scheduling container and a docker daemon container come up together in a sidecar structure.
-
-To solve such cases, there are docker images that support DinD.
-
-Like the following yaml file, by specifying image and dockerdWithinRunnerContainer, the docker daemon runs inside the runner and the workflow executes on that runner.
-
-```yaml
-apiVersion: actions.summerwind.dev/v1alpha1
-kind: RunnerDeployment
-metadata:
-  name: example-runner-deployment
-  namespace: actions-runner-system
-spec:
-  replicas: 1
-  template:
-    spec:
-      repository: <YOUR_NAME>/<YOUR_REPO_NAME>
-      labels:
-        - self-hosted
-        - arc-runner
-      image: "summerwind/actions-runner-dind:latest"
-      dockerdWithinRunnerContainer: true
-```
-
-Especially for docker tests that require GPU, using the above DinD image on a cluster with NVIDIA Container Toolkit installed allows GPU recognition.
-
-If you configure as shown below in the workflow you want to run, you can confirm that GPU is properly set up even in DinD situations.
-(NVIDIA Container Toolkit and NVIDIA GPU Driver Plugin version check is necessary!)
-
-```bash
-# Check GPU devices
-ls -la /dev/nvidia*
-
-# device library setup
-smi_path=$(find / -name "nvidia-smi" 2>/dev/null | head -n 1)
-lib_path=$(find / -name "libnvidia-ml.so" 2>/dev/null | head -n 1)
-lib_dir=$(dirname "$lib_path")
-export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$(dirname "$lib_path")
-export NVIDIA_VISIBLE_DEVICES=all
-export NVIDIA_DRIVER_CAPABILITIES=compute,utility
-
-# Direct GPU device and library mount without nvidia runtime
-docker run -it \
-  --device=/dev/nvidia0:/dev/nvidia0 \
-  --device=/dev/nvidiactl:/dev/nvidiactl \
-  --device=/dev/nvidia-uvm:/dev/nvidia-uvm \
-  --device=/dev/nvidia-uvm-tools:/dev/nvidia-uvm-tools \
-  -v "$lib_dir:$lib_dir:ro" \
-  -v "$(dirname $smi_path):$(dirname $smi_path):ro" \
-  -e LD_LIBRARY_PATH="$LD_LIBRARY_PATH" \
-  -e NVIDIA_VISIBLE_DEVICES="$NVIDIA_VISIBLE_DEVICES" \
-  -e NVIDIA_DRIVER_CAPABILITIES="$NVIDIA_DRIVER_CAPABILITIES" \
-  pytorch/pytorch:2.6.0-cuda12.4-cudnn9-runtime
-```
-
-### Conclusion
-
-We've explored how to build Actions Runner Controller in a Kubernetes environment to create a dynamically scalable self-hosted runner environment.
-
-Using ARC can solve both the expensive cost problem when using GitHub-provided runners and the inefficiency when directly managing VMs to operate runners. Especially when building MLOps CI/CD environments that require GPU or have complex dependencies, ARC becomes a very powerful tool.
-
-The initial setup process may feel somewhat complex, but once built, it significantly reduces CI/CD costs and operational burden, so if you're considering MLOps, I highly recommend considering its adoption.
-
diff --git a/scripts/translate_to_en.py b/scripts/translate_to_en.py
new file mode 100644
index 0000000..a4515d3
--- /dev/null
+++ b/scripts/translate_to_en.py
@@ -0,0 +1,177 @@
+#!/usr/bin/env python3
+import os
+import re
+import argparse
+import sys
+from pathlib import Path
+
+import frontmatter
+import yaml
+from slugify import slugify
+
+try:
+    from openai import OpenAI
+except Exception:
+    print("[ERROR] openai package not available. Make sure it's installed.")
+    sys.exit(1)
+
+
+REPO_ROOT = Path(__file__).resolve().parent.parent
+POSTS_DIR = REPO_ROOT / "_posts"
+POSTS_EN_DIR = REPO_ROOT / "_posts_en"
+
+
+PROMPT_SYSTEM = (
+    "You are an expert technical writer and translator. Translate the given Korean Jekyll blog post content into natural, concise American English.\n"
+    "- Preserve front matter keys; translate 'title' and any 'description' or 'summary' fields to English.\n"
+    "- Keep Markdown structure, headings, code blocks, links, images, and footnotes intact.\n"
+    "- Do not hallucinate code or change code semantics.\n"
+    "- Maintain YAML front matter formatting.\n"
+    "- If there is mixed language, prefer English.\n"
+)
+
+
+def normalize_repo_path(path_like: str | Path) -> Path:
+    p = Path(path_like)
+    return (REPO_ROOT / p).resolve() if not p.is_absolute() else p.resolve()
+
+
+def load_korean_posts(only_paths: list[Path] | None = None) -> list[Path]:
+    if only_paths:
+        results: list[Path] = []
+        for p in only_paths:
+            abs_p = normalize_repo_path(p)
+            try:
+                abs_p.relative_to(POSTS_DIR)
+            except ValueError:
+                # skip files outside _posts
+                continue
+            if abs_p.exists() and abs_p.suffix == ".md":
+                results.append(abs_p)
+        # de-duplicate and sort
+        return sorted({p for p in results})
+    return sorted(POSTS_DIR.glob("*.md"))
+
+
+def to_en_filename(source_path: Path, en_title: str) -> Path:
+    # source: YYYY-MM-DD-title.md -> keep date, replace slug with English slug
+    date_prefix = source_path.name.split("-", 3)[:3]
+    # Build english slug from translated title
+    en_slug = slugify(en_title)
+    filename = f"{date_prefix[0]}-{date_prefix[1]}-{date_prefix[2]}-{en_slug}.md"
+    return POSTS_EN_DIR / filename
+
+
+def needs_translation(src_file: Path) -> bool:
+    try:
+        post = frontmatter.load(src_file)
+    except Exception:
+        return True
+    title = str(post.get("title", ""))
+    # If corresponding EN file already exists, skip
+    en_filename = to_en_filename(src_file, title or src_file.stem)
+    if en_filename.exists():
+        return False
+    # If any file on the same date exists in _posts_en, we might have different slug; check by date
+    date_prefix = "-".join(src_file.name.split("-", 3)[:3])
+    matches = list(POSTS_EN_DIR.glob(f"{date_prefix}-*.md"))
+    return len(matches) == 0
+
+
+def build_prompt(post: frontmatter.Post) -> str:
+    fm_yaml = yaml.safe_dump(dict(post), allow_unicode=True, sort_keys=False).strip()
+    content = post.content
+    return (
+        "Translate the following Jekyll Markdown post to English.\n\n"
+        "---\n" + fm_yaml + "\n---\n\n" + content
+    )
+
+
+def call_openai(model: str, system_prompt: str, user_prompt: str) -> str:
+    api_key = os.environ.get("OPENAI_API_KEY")
+    if not api_key:
+        print("[ERROR] OPENAI_API_KEY is not set. Skipping translation.")
+        sys.exit(2)
+    client = OpenAI(api_key=api_key)
+    resp = client.chat.completions.create(
+        model=model,
+        messages=[
+            {"role": "system", "content": system_prompt},
+            {"role": "user", "content": user_prompt},
+        ],
+        temperature=0.3,
+    )
+    return resp.choices[0].message.content
+
+
+def split_front_matter(translated_markdown: str) -> tuple[dict, str]:
+    # Expect YAML front matter at the top
+    m = re.match(r"^---\n([\s\S]+?)\n---\n?([\s\S]*)$", translated_markdown.strip())
+    if not m:
+        return {}, translated_markdown
+    fm_text, body = m.group(1), m.group(2)
+    try:
+        fm = yaml.safe_load(fm_text) or {}
+        if not isinstance(fm, dict):
+            fm = {}
+    except Exception:
+        fm = {}
+    return fm, body
+
+
+def ensure_posts_en_dir():
+    POSTS_EN_DIR.mkdir(parents=True, exist_ok=True)
+
+
+def main() -> int:
+    parser = argparse.ArgumentParser(
+        description="Translate Korean _posts to English _posts_en"
+    )
+    parser.add_argument(
+        "--only", nargs="*", help="Only translate these paths under _posts/"
+    )
+    args = parser.parse_args()
+
+    ensure_posts_en_dir()
+    model = os.environ.get("TRANSLATION_MODEL", "gpt-5.1-mini")
+    only_files: list[str] = []
+    if args.only:
+        only_files.extend(args.only)
+    env_only = os.environ.get("ONLY_FILES")
+    if env_only:
+        only_files.extend(
+            [line.strip() for line in env_only.splitlines() if line.strip()]
+        )
+
+    only_paths = [Path(p) for p in only_files] if only_files else None
+
+    created = 0
+    force = only_paths is not None
+    for src in load_korean_posts(only_paths):
+        if not force and not needs_translation(src):
+            continue
+        post = frontmatter.load(src)
+        prompt = build_prompt(post)
+        translated = call_openai(model, PROMPT_SYSTEM, prompt)
+        fm, body = split_front_matter(translated)
+
+        # Fallbacks if model didn't preserve FM properly
+        if not fm:
+            fm = dict(post)
+        if "title" not in fm or not fm["title"]:
+            fm["title"] = dict(post).get("title", "")
+
+        # Construct english filename from translated title
+        en_path = to_en_filename(src, str(fm.get("title", src.stem)))
+        en_post = frontmatter.Post(body, **fm)
+        with open(en_path, "w", encoding="utf-8") as f:
+            f.write(frontmatter.dumps(en_post))
+        print(f"[translate] Created: {en_path.relative_to(REPO_ROOT)}")
+        created += 1
+
+    print(f"[translate] New English posts: {created}")
+    return 0
+
+
+if __name__ == "__main__":
+    raise SystemExit(main())