diff --git a/.github/workflows/ansible-deploy-bonus.yml b/.github/workflows/ansible-deploy-bonus.yml new file mode 100644 index 0000000000..93b7230230 --- /dev/null +++ b/.github/workflows/ansible-deploy-bonus.yml @@ -0,0 +1,83 @@ +--- +name: Ansible Deploy Bonus (Go App) + +on: + push: + branches: [main, master, lab06] + paths: + - 'ansible/vars/app_bonus.yml' + - 'ansible/playbooks/deploy_bonus.yml' + - 'ansible/playbooks/deploy_all.yml' + - 'ansible/roles/web_app/**' + - '.github/workflows/ansible-deploy-bonus.yml' + pull_request: + branches: [main, master, lab06] + paths: + - 'ansible/vars/app_bonus.yml' + - 'ansible/playbooks/deploy_bonus.yml' + - 'ansible/playbooks/deploy_all.yml' + - 'ansible/roles/web_app/**' + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + pip install ansible ansible-lint + + - name: Run ansible-lint + run: | + cd ansible + ansible-lint playbooks/*.yml + + deploy: + name: Deploy Bonus (Go) Application + needs: lint + if: github.event_name == 'push' + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Ansible and collections + run: | + pip install ansible + cd ansible + ansible-galaxy install -r requirements.yml + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + ssh-keyscan -H ${{ secrets.VM_HOST }} >> ~/.ssh/known_hosts + + - name: Deploy Bonus Application + run: | + cd ansible + echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass + ansible-playbook playbooks/deploy_bonus.yml \ + -i inventory/hosts.ini \ + --vault-password-file /tmp/vault_pass + rm /tmp/vault_pass + + - name: Verify Bonus App Deployment + run: | + sleep 10 + curl -f http://${{ secrets.VM_HOST }}:8001 || exit 1 + curl -f http://${{ secrets.VM_HOST }}:8001/health || exit 1 diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..0b706c2f32 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,88 @@ +--- +name: Ansible Deployment + +on: + push: + branches: [main, master, lab06] + paths: + - 'ansible/vars/app_python.yml' + - 'ansible/playbooks/deploy.yml' + - 'ansible/playbooks/deploy_python.yml' + - 'ansible/playbooks/deploy_all.yml' + - 'ansible/roles/web_app/**' + - 'ansible/roles/common/**' + - 'ansible/roles/docker/**' + - 'ansible/group_vars/**' + - 'ansible/inventory/**' + - 'ansible/playbooks/provision.yml' + - '!ansible/docs/**' + - '.github/workflows/ansible-deploy.yml' + pull_request: + branches: [main, master, lab06] + paths: + - 'ansible/**' + - '!ansible/docs/**' + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install dependencies + run: | + pip install ansible ansible-lint + + - name: Run ansible-lint + run: | + cd ansible + ansible-lint playbooks/*.yml + + deploy: + name: Deploy Application + needs: lint + if: github.event_name == 'push' + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.12' + + - name: Install Ansible and collections + run: | + pip install ansible + cd ansible + ansible-galaxy install -r requirements.yml + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_rsa + chmod 600 ~/.ssh/id_rsa + ssh-keyscan -H ${{ secrets.VM_HOST }} >> ~/.ssh/known_hosts + + - name: Deploy Python Application + run: | + cd ansible + echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass + ansible-playbook playbooks/deploy_python.yml \ + -i inventory/hosts.ini \ + --vault-password-file /tmp/vault_pass + rm /tmp/vault_pass + + - name: Verify Deployment + run: | + sleep 10 + curl -f http://${{ secrets.VM_HOST }}:8000 || exit 1 + curl -f http://${{ secrets.VM_HOST }}:8000/health || exit 1 diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml new file mode 100644 index 0000000000..d39788f294 --- /dev/null +++ b/.github/workflows/go-ci.yml @@ -0,0 +1,134 @@ +name: Go CI/CD Pipeline + +# Cancel in-progress runs when a new run is triggered +concurrency: + group: go-ci-${{ github.ref }} + cancel-in-progress: true + +# Path-based triggers: only run when app_go files change +on: + push: + branches: + - main + - master + - lab03 + paths: + - 'app_go/**' + - '.github/workflows/go-ci.yml' + pull_request: + branches: + - main + - master + paths: + - 'app_go/**' + - '.github/workflows/go-ci.yml' + +env: + GO_VERSION: '1.21' + DOCKER_IMAGE: mirana18/devops-info-service-go + +jobs: + test: + name: Code Quality & Testing + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Go ${{ env.GO_VERSION }} + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: app_go/go.mod + cache: true + + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v6 + with: + version: latest + working-directory: app_go + args: --timeout=5m + + - name: Run tests + working-directory: ./app_go + run: go test -v -race -coverprofile=coverage.out -covermode=atomic ./... + + - name: Generate coverage report + working-directory: ./app_go + run: | + go tool cover -func=coverage.out + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: app_go/coverage.out + flags: go + name: go-coverage + fail_ci_if_error: false + token: ${{ secrets.CODECOV_TOKEN }} + + - name: Upload coverage report + uses: actions/upload-artifact@v4 + with: + name: go-coverage-report + path: go_python/coverage.xml + retention-days: 7 + + docker: + name: Build & Push Docker Image + runs-on: ubuntu-latest + needs: test + + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master' || github.ref == 'refs/heads/lab03') + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + - name: Generate version tags (CalVer) + id: meta + run: | + VERSION=$(date +%Y.%m) + BUILD_NUMBER=${{ github.run_number }} + FULL_VERSION="${VERSION}.${BUILD_NUMBER}" + SHORT_SHA=$(echo ${{ github.sha }} | cut -c1-7) + echo "version=${VERSION}" >> $GITHUB_OUTPUT + echo "full_version=${FULL_VERSION}" >> $GITHUB_OUTPUT + echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT + + - name: Extract Docker metadata + id: docker_meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_IMAGE }} + tags: | + type=raw,value=${{ steps.meta.outputs.full_version }} + type=raw,value=${{ steps.meta.outputs.version }} + type=raw,value=latest + type=raw,value=sha-${{ steps.meta.outputs.short_sha }} + labels: | + org.opencontainers.image.title=DevOps Info Service (Go) + org.opencontainers.image.description=Go-based system information service + org.opencontainers.image.version=${{ steps.meta.outputs.full_version }} + org.opencontainers.image.revision=${{ github.sha }} + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: ./app_go + file: ./app_go/Dockerfile + push: true + tags: ${{ steps.docker_meta.outputs.tags }} + labels: ${{ steps.docker_meta.outputs.labels }} + cache-from: type=registry,ref=${{ env.DOCKER_IMAGE }}:latest + cache-to: type=inline diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..bfac3722aa --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,179 @@ +name: Python CI/CD Pipeline + +# Cancel in-progress runs when a new run is triggered +concurrency: + group: python-ci-${{ github.ref }} + cancel-in-progress: true + +# Workflow triggers +on: + push: + branches: + - main + - master + - lab03 + paths: + - 'app_python/**' + - '.github/workflows/python-ci.yml' + pull_request: + branches: + - main + - master + paths: + - 'app_python/**' + - '.github/workflows/python-ci.yml' + +# Environment variables used across jobs +env: + PYTHON_VERSION: '3.11' + DOCKER_IMAGE: mirana18/devops-info-service + +jobs: + # Job 1: Code Quality & Testing + test: + name: Code Quality & Testing + runs-on: ubuntu-latest + + steps: + # Step 1: Check out the repository code + - name: Checkout code + uses: actions/checkout@v4 + + # Step 2: Set up Python environment + - name: Set up Python ${{ env.PYTHON_VERSION }} + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' # Cache pip dependencies for faster runs + cache-dependency-path: | + app_python/requirements.txt + app_python/requirements-dev.txt + + # Step 3: Install dependencies + - name: Install dependencies + working-directory: ./app_python + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + # Install linter + pip install flake8 + + # Step 4: Run linter (flake8) + - name: Lint with flake8 + working-directory: ./app_python + run: | + # Stop the build if there are Python syntax errors or undefined names + flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics + # Exit-zero treats all errors as warnings. Line length set to 100 + flake8 . --count --exit-zero --max-complexity=10 --max-line-length=100 --statistics + + # Step 5: Run unit tests with pytest and coverage + - name: Run tests with pytest and coverage + working-directory: ./app_python + run: | + pytest -v --tb=short --cov=. --cov-report=term --cov-report=xml --cov-fail-under=70 + + # Step 6: Upload coverage to Codecov + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + file: app_python/coverage.xml + flags: python + name: python-coverage + fail_ci_if_error: false + token: ${{ secrets.CODECOV_TOKEN }} + + # Step 7: Upload coverage artifact (for review) + - name: Upload coverage report + uses: actions/upload-artifact@v4 + with: + name: python-coverage-report + path: app_python/coverage.xml + retention-days: 7 + + # Job 2: Docker Build & Push (only runs if tests pass) + docker: + name: Build & Push Docker Image + runs-on: ubuntu-latest + needs: test # This job only runs if 'test' job succeeds + + # Only push to Docker Hub on push to main/master (not on PRs) + if: github.event_name == 'push' && (github.ref == 'refs/heads/main' || github.ref == 'refs/heads/master' || github.ref == 'refs/heads/lab03') + + steps: + # Step 1: Check out code + - name: Checkout code + uses: actions/checkout@v4 + + # Step 2: Set up Docker Buildx (for advanced build features) + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + # Step 3: Log in to Docker Hub + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKER_USERNAME }} + password: ${{ secrets.DOCKER_PASSWORD }} + + # Step 4: Generate version tags using Calendar Versioning (CalVer) + - name: Generate version tags + id: meta + run: | + # CalVer format: YYYY.MM (e.g., 2026.02) + VERSION=$(date +%Y.%m) + + # Build number (GitHub run number) + BUILD_NUMBER=${{ github.run_number }} + + # Full version with build: YYYY.MM.BUILD (e.g., 2026.02.15) + FULL_VERSION="${VERSION}.${BUILD_NUMBER}" + + # Short commit SHA for traceability + SHORT_SHA=$(echo ${{ github.sha }} | cut -c1-7) + + echo "version=${VERSION}" >> $GITHUB_OUTPUT + echo "full_version=${FULL_VERSION}" >> $GITHUB_OUTPUT + echo "short_sha=${SHORT_SHA}" >> $GITHUB_OUTPUT + + echo "Generated version: ${FULL_VERSION}" + echo "Commit SHA: ${SHORT_SHA}" + + # Step 5: Extract Docker metadata for tags and labels + - name: Extract Docker metadata + id: docker_meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_IMAGE }} + tags: | + # CalVer version with build number (e.g., 2026.02.15) + type=raw,value=${{ steps.meta.outputs.full_version }} + # CalVer version without build (e.g., 2026.02) + type=raw,value=${{ steps.meta.outputs.version }} + # Latest tag + type=raw,value=latest + # Commit SHA (for traceability) + type=raw,value=sha-${{ steps.meta.outputs.short_sha }} + labels: | + org.opencontainers.image.title=DevOps Info Service + org.opencontainers.image.description=Flask-based system information service + org.opencontainers.image.version=${{ steps.meta.outputs.full_version }} + org.opencontainers.image.revision=${{ github.sha }} + + # Step 6: Build and push Docker image + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: ./app_python + file: ./app_python/Dockerfile + push: true + tags: ${{ steps.docker_meta.outputs.tags }} + labels: ${{ steps.docker_meta.outputs.labels }} + cache-from: type=registry,ref=${{ env.DOCKER_IMAGE }}:latest + cache-to: type=inline + build-args: | + BUILD_DATE=${{ github.event.head_commit.timestamp }} + VCS_REF=${{ github.sha }} + VERSION=${{ steps.meta.outputs.full_version }} + diff --git a/.gitignore b/.gitignore index 30d74d2584..b215a9cf93 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,10 @@ -test \ No newline at end of file +test +venv/ + +*.retry +.vault_pass +.vault_password +vault_pass* +ansible/inventory/*.pyc + +monitoring/.env \ No newline at end of file diff --git a/ansible/.ansible-lint b/ansible/.ansible-lint new file mode 100644 index 0000000000..714428c8d4 --- /dev/null +++ b/ansible/.ansible-lint @@ -0,0 +1,3 @@ +# Skip syntax-check when vars_files use vault (fails without password) +skip_list: + - internal-error diff --git a/ansible/README.md b/ansible/README.md new file mode 100644 index 0000000000..240d317480 --- /dev/null +++ b/ansible/README.md @@ -0,0 +1,120 @@ +# Ansible — Lab 5 & Lab 6 + +[![Ansible Deployment](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/ansible-deploy.yml/badge.svg)](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/ansible-deploy.yml) + +## Quick start + +Run the commands below from the **`ansible/`** directory. + +1. **Set your VM IP** + Edit `inventory/hosts.ini`: replace the IP with your VM's public IP. + +2. **Install Ansible collections:** + ```bash + ansible-galaxy install -r requirements.yml + ``` + +3. **Create or edit encrypted variables** (Docker Hub credentials and app config): + - If `group_vars/all.yml` **does not exist**: + `ansible-vault create group_vars/all.yml` + Paste content from `group_vars/all.yml.example`, save, remember the vault password. + - If `group_vars/all.yml` **already exists**: + `ansible-vault edit group_vars/all.yml` + +4. **Test connectivity:** + ```bash + ansible all -m ping --ask-vault-pass + ``` + +5. **Provision** (install common packages + Docker): + ```bash + ansible-playbook playbooks/provision.yml --ask-vault-pass + ``` + +6. **Deploy application** (Docker Compose): + ```bash + ansible-playbook playbooks/deploy.yml --ask-vault-pass + # Or: deploy_python.yml (Python only), deploy_bonus.yml (Go only), deploy_all.yml (both) + ``` + +7. **Verify:** + ```bash + ansible webservers -a "docker ps" --ask-vault-pass + curl http://:8000/health + ``` + +8. **Deploy monitoring stack (Lab 7 bonus)** — Loki, Promtail, Grafana + apps: + ```bash + ansible-playbook playbooks/deploy-monitoring.yml --ask-vault-pass + ``` + Then open http://:3000 (Grafana), add Loki data source `http://loki:3100`. + +## Tag-based execution + +```bash +# Run only docker tasks +ansible-playbook playbooks/provision.yml --tags "docker" --ask-vault-pass + +# Skip common role +ansible-playbook playbooks/provision.yml --skip-tags "common" --ask-vault-pass + +# Deploy only +ansible-playbook playbooks/deploy.yml --tags "app_deploy" --ask-vault-pass + +# Wipe and redeploy +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --ask-vault-pass + +# Wipe only (no redeploy) +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe --ask-vault-pass + +# List all tags +ansible-playbook playbooks/deploy.yml --list-tags +``` + +## Structure + +``` +ansible/ +├── ansible.cfg +├── requirements.yml +├── inventory/ +│ ├── hosts.ini static (default) +│ └── yandex_cloud.yml dynamic (Lab 5 bonus) +├── group_vars/ +│ ├── all.yml (encrypted) +│ └── all.yml.example +├── vars/ +│ ├── app_python.yml Lab 6 bonus: Python app vars +│ └── app_bonus.yml Lab 6 bonus: Go app vars +├── playbooks/ +│ ├── provision.yml common + docker +│ ├── deploy.yml web_app (default/single app) +│ ├── deploy_python.yml Python app only +│ ├── deploy_bonus.yml Go app only +│ ├── deploy_all.yml both apps +│ └── deploy-monitoring.yml Loki stack (Lab 7 bonus) +├── roles/ +│ ├── common/ base system (apt, packages, timezone) +│ ├── docker/ Docker CE install and service +│ ├── web_app/ Docker Compose deployment + wipe logic +│ └── monitoring/ Loki/Promtail/Grafana stack (Lab 7 bonus) +│ ├── defaults/main.yml +│ ├── meta/main.yml (depends on docker role) +│ ├── tasks/ +│ │ ├── main.yml +│ │ ├── setup.yml (dirs + template configs) +│ │ └── deploy.yml (docker_compose_v2 + wait for Loki/Grafana) +│ └── templates/ +│ ├── docker-compose.yml.j2 +│ ├── loki-config.yml.j2 +│ └── promtail-config.yml.j2 +└── docs/ + ├── LAB05.md + └── LAB06.md +``` + +Documentation: `docs/LAB05.md`, `docs/LAB05_BONUS.md` (dynamic inventory), `docs/LAB06.md` + +### Troubleshooting + +If "Failed to update apt cache" on the VM — the VM has no outbound internet. Check security group egress rules. See `docs/LAB05.md` for details. diff --git a/ansible/ansible.cfg b/ansible/ansible.cfg new file mode 100644 index 0000000000..0ddcbf1672 --- /dev/null +++ b/ansible/ansible.cfg @@ -0,0 +1,11 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/ansible/docs/LAB05.md b/ansible/docs/LAB05.md new file mode 100644 index 0000000000..0471daf8a3 --- /dev/null +++ b/ansible/docs/LAB05.md @@ -0,0 +1,81 @@ +# Lab 5 — Ansible Fundamentals (Documentation) + +## 1. Architecture Overview + +- **Ansible version:** 2.16+ (run `ansible --version` to confirm). +- **Target VM OS and version:** Ubuntu 22.04 LTS (VM from Lab 4, Pulumi + Yandex Cloud). +- **Role structure:** + - **common** — base system setup: force IPv4 for apt, optional Yandex mirror, apt cache update, install packages (curl, git, vim, htop, etc.), timezone. + - **docker** — install Docker CE from official repository, refresh cache after adding repo, install packages (docker-ce, docker-ce-cli, containerd.io), docker service, add user to docker group, python3-docker. + - **app_deploy** — verify Vault variables, Docker Hub login, pull image, stop/remove old container, run new one with port 5001, wait for port, check /health. +- **Why roles instead of monolithic playbooks?** Roles enable code reuse, separate testability, and short playbooks; logic is split by concern (common / docker / app), and one role can be used across multiple playbooks and projects. + +## 2. Roles Documentation + +### common +- **Purpose:** Base system setup: force IPv4 for apt (avoid IPv6 "Network is unreachable"), optional Yandex mirror for Ubuntu, apt cache update, install packages (python3-pip, curl, git, vim, htop, unzip, ca-certificates, gnupg, lsb-release), set timezone (Europe/Moscow). +- **Variables:** `use_yandex_mirror` (default: true), `common_packages` (list), `timezone` (default: Europe/Moscow). In `defaults/main.yml`. +- **Handlers:** None. +- **Dependencies:** None. + +### docker +- **Purpose:** Install Docker CE: dependencies (ca-certificates, curl, gnupg), GPG key and Docker repository, apt cache update, install docker-ce, docker-ce-cli, containerd.io, start and enable docker service, add user (ansible_user) to docker group, install python3-docker for Ansible modules. +- **Variables:** In `defaults/main.yml`: `docker_install_compose`, `docker_users`. Tasks use architecture mapping (x86_64→amd64, aarch64→arm64) for repository URL. +- **Handlers:** `restart docker` — restart docker service when repository or packages change. +- **Dependencies:** None (common role typically runs first to update apt). + +### app_deploy +- **Purpose:** Deploy application in Docker: verify dockerhub_username and dockerhub_password, Docker Hub login (no_log), pull image, stop and remove old container by name, run new container with port mapping (app_port:app_container_port, default 5001:5001), restart policy unless-stopped, wait for port, GET /health check. +- **Variables:** From group_vars (Vault): `dockerhub_username`, `dockerhub_password`, `app_name`, `docker_image`, `docker_image_tag`, `app_port`, `app_container_name`. In role defaults: `app_port`, `app_container_port` (5001), `app_restart_policy`, `app_env`. +- **Handlers:** `restart app container` (optional, conditional). +- **Dependencies:** Requires docker role (Docker on host) and encrypted group_vars/all.yml with credentials. + +## 3. Idempotency Demonstration + +- **First run:** On first run of `ansible-playbook playbooks/provision.yml --ask-vault-pass`, tasks show **changed**: apt cache update, package installs (common, Docker dependencies, Docker repo, Docker packages, python3-docker), mirror setup/force IPv4 when use_yandex_mirror, docker service start, user added to docker group, timezone set. +- **Second run:** On second run the same tasks show **ok** — state already matches desired, no (or minimal) changes. +- **Analysis:** First run brings packages, repos, service, and user to desired state; second run shows modules (apt, service, user, template/copy) see target state is met and do not change the system. +- **Explanation:** Idempotency comes from declarative modules with explicit state: `apt: state=present`, `service: state=started`, `user: groups: docker`, `template`/`copy` with fixed content. Ansible applies changes only when current and desired state differ. + +## 4. Ansible Vault Usage + +- **Storage:** Docker Hub credentials and app variables are stored in `group_vars/all.yml`, encrypted with `ansible-vault create` (or `ansible-vault encrypt`). The file can be committed; without the Vault password the content is unreadable. +- **Vault password management:** Use `--ask-vault-pass` when running playbooks and ad-hoc commands; alternative: password file (e.g. `.vault_pass`), `chmod 600`, and `--vault-password-file` or `vault_password_file` in ansible.cfg. Password file is in `.gitignore`. +- **Example encrypted file:** `head -5 group_vars/all.yml` shows lines like `$ANSIBLE_VAULT;1.1;AES256` or `$ANSIBLE_VAULT;1.2;AES256` — file is encrypted. To verify decryption: `ansible-vault view group_vars/all.yml --ask-vault-pass`. +- **Why Ansible Vault is important:** Keeps secrets (Docker Hub login/password) in the repo in encrypted form; decryption only with the Vault password, reducing leakage risk when collaborating and backing up. + +## 5. Deployment Verification + +- **Deploy run output:** Output of `ansible-playbook playbooks/deploy.yml --ask-vault-pass`: tasks Ensure Docker Hub credentials, Log in to Docker Hub, Pull Docker image, Stop existing container (if any), Remove old container, Run application container, Wait for application port, Check health endpoint — all succeed (ok or changed as needed). +- **Container status:** Example output of `ansible webservers -a "docker ps" --ask-vault-pass`: + ```text + web1 | CHANGED | rc=0 >> + CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES + /devops-info-service:latest "python app.py" ... Up ... 0.0.0.0:5001->5001/tcp devops-app + ``` +- **Health check verification:** From local machine: + ```bash + curl http://89.169.129.155:5001/health + ``` + Example response: + ```json + {"status":"healthy","timestamp":"2026-02-25T10:07:38.381157.000Z","uptime_seconds":91485.11} + ``` + Main page: `curl http://89.169.129.155:5001/` — returns service info. +- **Handlers:** For deploy, the "restart app container" handler is not needed in the typical flow (container is recreated by Run application container). The "restart docker" handler in the docker role runs when Docker repo or packages change during provisioning. + +## 6. Key Decisions + +- **Why roles instead of plain playbooks?** Roles group related tasks, defaults, and handlers by concern (common / docker / app); playbooks stay short and readable; the same roles can be used in different playbooks and projects. +- **How do roles improve reusability?** One role can be included in multiple playbooks and optionally published to Ansible Galaxy; a change in the role applies everywhere it is used. +- **What makes a task idempotent?** Using modules that describe desired state (e.g. `state: present`, `state: started`) instead of one-off commands; Ansible only applies changes when current and target state differ. +- **How do handlers improve efficiency?** Handlers run once at the end of the playbook even with multiple notifies (e.g. one Docker restart after several config or package changes). +- **Why is Ansible Vault necessary?** To store secrets in the repo encrypted and avoid keeping passwords and tokens in plain text in code and commit history. + +## 7. Challenges + +- **"Failed to update apt cache" on VM:** The VM had no outbound internet. In Pulumi the security group had only ingress rules; an egress rule was added (protocol ANY, 0.0.0.0/0). The common role also uses Yandex mirror and forces IPv4 for apt to reduce dependence on IPv6 and external mirrors. +- **docker-ce package not found:** The Docker repo URL used architecture from ansible_architecture (x86_64/aarch64) while Docker expects amd64/arm64. Mapping was added in the "Add Docker repository" task. After adding the repo, explicit apt cache update (cache_valid_time: 0) was added so packages from the new repo are visible. +- **Variables from group_vars not loaded:** In deploy.yml playbook, explicit `vars_files: ../group_vars/all.yml` was added so variables from the encrypted file are used on deploy regardless of current directory and load order. +- **"Cannot create container when image is not specified" in Stop existing container:** The docker_container module with state: stopped requires the image parameter. The image parameter was added to the "Stop existing container" task. +- **Accessing the app from outside (curl on port 5001):** In Pulumi the security group only had port 5000 open; the app listens on 5001. An ingress rule for TCP 5001 (app-5001-rule) was added in `pulumi/__main__.py` and applied with `pulumi up`. diff --git a/ansible/docs/LAB06.md b/ansible/docs/LAB06.md new file mode 100644 index 0000000000..e27dd73d64 --- /dev/null +++ b/ansible/docs/LAB06.md @@ -0,0 +1,859 @@ +# Lab 6: Advanced Ansible & CI/CD - Submission + +**Name:** Arina Zimina +**Date:** 2026-03-04 +**Lab Points:** 10 + bonus + +--- + +## Task 1: Blocks & Tags (2 pts) + +### Implementation + +All three roles (`common`, `docker`, `web_app`) were refactored to use Ansible blocks with `rescue`/`always` sections and a comprehensive tag strategy. + +#### `common` role + +- **`packages` block**: groups apt mirror configuration, cache update, and package installation. `rescue` retries Apt cache update (apt module) if the block fails, then retries package install. `always` logs completion to `/tmp/ansible_common_packages.log`. +- **`users` block**: groups timezone setup. `always` logs completion. +- Tags: `packages`, `users`, `common`. + +#### `docker` role + +- **`docker_install` block**: groups Docker GPG key, repo, cache update, and package installation. `rescue` waits 10 seconds and retries (handles transient network failures). `always` ensures Docker service is enabled and started. +- **`docker_config` block**: groups user group membership and python3-docker installation. +- Tags: `docker_install`, `docker_config`, `docker`. + +#### `web_app` role + +- **Deploy block** (`app_deploy`, `compose`): groups Docker Hub login, directory creation, template rendering, compose up, health check. `rescue` logs the error and fails with diagnostic info. +- **Wipe block** (`web_app_wipe`): gated by `when: web_app_wipe | bool` variable. + +### Tag strategy + +| Tag | Scope | +|-----|-------| +| `packages` | common role: package installation only | +| `users` | common role: user/system configuration | +| `common` | entire common role | +| `docker_install` | docker role: installation only | +| `docker_config` | docker role: configuration only | +| `docker` | entire docker role | +| `app_deploy` | web_app role: deployment tasks | +| `compose` | web_app role: docker compose tasks | +| `web_app_wipe` | web_app role: wipe/cleanup tasks | + +### Evidence + +```bash +# Selective execution with tags +ansible-playbook playbooks/provision.yml --tags "docker" --ask-vault-pass + +# Skip common role +ansible-playbook playbooks/provision.yml --skip-tags "common" --ask-vault-pass + +# List all tags +ansible-playbook playbooks/deploy.yml --list-tags --ask-vault-pass +``` +```bash +Vault password: + +PLAY [Provision web servers] ***************************************************************************************************************************************** + +TASK [Gathering Facts] *********************************************************************************************************************************************** +ok: [web1] + +TASK [docker : Install dependencies for Docker] ********************************************************************************************************************** +ok: [web1] + +TASK [docker : Add Docker GPG key] *********************************************************************************************************************************** +ok: [web1] + +TASK [docker : Add Docker repository] ******************************************************************************************************************************** +[WARNING]: Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:23:15 + +21 - name: Add Docker repository +22 ansible.builtin.apt_repository: +23 repo: >- + ^ column 15 + +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. + +ok: [web1] + +TASK [docker : Update Apt cache after adding Docker repo] ************************************************************************************************************ +changed: [web1] + +TASK [docker : Install Docker packages] ****************************************************************************************************************************** +ok: [web1] + +TASK [docker : Ensure Docker service is enabled and started] ********************************************************************************************************* +ok: [web1] + +TASK [docker : Add remote user to docker group] ********************************************************************************************************************** +ok: [web1] + +TASK [docker : Install python3-docker for Ansible Docker modules] **************************************************************************************************** +ok: [web1] + +PLAY RECAP *********************************************************************************************************************************************************** +web1 : ok=9 changed=1 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + +zsh: command not found: # +Vault password: + +PLAY [Provision web servers] ***************************************************************************************************************************************** + +TASK [Gathering Facts] *********************************************************************************************************************************************** +ok: [web1] + +TASK [docker : Install dependencies for Docker] ********************************************************************************************************************** +ok: [web1] + +TASK [docker : Add Docker GPG key] *********************************************************************************************************************************** +ok: [web1] + +TASK [docker : Add Docker repository] ******************************************************************************************************************************** +[WARNING]: Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:23:15 + +21 - name: Add Docker repository +22 ansible.builtin.apt_repository: +23 repo: >- + ^ column 15 + +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. + +ok: [web1] + +TASK [docker : Update Apt cache after adding Docker repo] ************************************************************************************************************ +changed: [web1] + +TASK [docker : Install Docker packages] ****************************************************************************************************************************** +ok: [web1] + +TASK [docker : Ensure Docker service is enabled and started] ********************************************************************************************************* +ok: [web1] + +TASK [docker : Add remote user to docker group] ********************************************************************************************************************** +ok: [web1] + +TASK [docker : Install python3-docker for Ansible Docker modules] **************************************************************************************************** +ok: [web1] + +PLAY RECAP *********************************************************************************************************************************************************** +web1 : ok=9 changed=1 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 + +zsh: command not found: # +Vault password: + +playbook: playbooks/deploy.yml + + play #1 (webservers): Deploy application TAGS: [] + TASK TAGS: [app_deploy, compose, docker, docker_config, docker_install, web_app_wipe] +``` +### Research Questions + +**Q: What happens if rescue block also fails?** +A: The play fails entirely. Ansible reports the original error from the block AND the rescue error. The `always` section still executes regardless — it runs whether block succeeds, block fails, or rescue fails. + +**Q: Can you have nested blocks?** +A: Yes, blocks can be nested. An inner block can have its own `rescue`/`always`. However, deep nesting hurts readability — prefer flat structure with separate blocks for each logical group. + +**Q: How do tags inherit to tasks within blocks?** +A: Tags applied at the block level automatically propagate to every task inside that block (including `rescue` and `always`). Tasks can also have their own additional tags. When running `--tags`, a task runs if it matches any of its inherited or direct tags. + +--- + +## Task 2: Docker Compose (3 pts) + +### Migration from `docker run` to Docker Compose + +The `app_deploy` role was renamed to `web_app` and completely rewritten to use Docker Compose via a Jinja2 template. + +#### Before (Lab 5 — `app_deploy`) + +- Used `community.docker.docker_container` module +- Stopped, removed, and re-created container on each run +- No declarative configuration file + +#### After (Lab 6 — `web_app`) + +- Uses a templated `docker-compose.yml.j2` +- Deploys with `docker compose up -d --force-recreate` +- Declarative — desired state is in a version-controlled template +- Easy to add services, volumes, networks in the future + +### Template structure + +```yaml +version: '3.8' +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_tag }} + container_name: {{ app_name }} + ports: + - "{{ app_port }}:{{ app_internal_port }}" + restart: {{ app_restart_policy }} +``` + +### Role dependencies + +`roles/web_app/meta/main.yml` declares `docker` as a dependency, so running `deploy.yml` automatically provisions Docker first. + +### Variables + +| Variable | Default | Description | +|----------|---------|-------------| +| `app_name` | `devops-app` | Container and service name | +| `docker_image` | from vault | Docker Hub image | +| `docker_tag` | `latest` | Image version | +| `app_port` | `8000` | Host port | +| `app_internal_port` | `5001` | Container port (Python app listens on 5001) | +| `compose_project_dir` | `/opt/{{ app_name }}` | Directory for compose file | +| `web_app_wipe` | `false` | Wipe control flag | + +### Evidence + +```bash +# Full deployment +ansible-playbook playbooks/deploy.yml --ask-vault-pass + +# Idempotency (second run) +ansible-playbook playbooks/deploy.yml --ask-vault-pass + +# Verify +ssh ubuntu@ "docker ps" +curl http://:8000/health +``` + +```bash +Vault password: + +PLAY [Deploy application] ********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************* +ok: [web1] + +TASK [docker : Install dependencies for Docker] ************************************************************ +ok: [web1] + +TASK [docker : Add Docker GPG key] ************************************************************************* +ok: [web1] + +TASK [docker : Add Docker repository] ********************************************************************** +[WARNING]: Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:23:15 + +21 - name: Add Docker repository +22 ansible.builtin.apt_repository: +23 repo: >- + ^ column 15 + +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. + +ok: [web1] + +TASK [docker : Update Apt cache after adding Docker repo] ************************************************** +changed: [web1] + +TASK [docker : Install Docker packages] ******************************************************************** +ok: [web1] + +TASK [docker : Ensure Docker service is enabled and started] *********************************************** +ok: [web1] + +TASK [docker : Add remote user to docker group] ************************************************************ +ok: [web1] + +TASK [docker : Install python3-docker for Ansible Docker modules] ****************************************** +ok: [web1] + +TASK [web_app : Include wipe tasks] ************************************************************************ +included: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for web1 + +TASK [web_app : Stop and remove containers via docker compose] ********************************************* +skipping: [web1] + +TASK [web_app : Remove docker-compose file] **************************************************************** +skipping: [web1] + +TASK [web_app : Remove application directory] ************************************************************** +skipping: [web1] + +TASK [web_app : Remove Docker image (optional cleanup)] **************************************************** +skipping: [web1] + +TASK [web_app : Log wipe completion] *********************************************************************** +skipping: [web1] + +TASK [web_app : Ensure Docker Hub credentials are set] ***************************************************** +ok: [web1] => { + "changed": false, + "msg": "All assertions passed" +} + +TASK [web_app : Log in to Docker Hub] ********************************************************************** +ok: [web1] + +TASK [web_app : Create application directory] ************************************************************** +ok: [web1] + +TASK [web_app : Template docker-compose file] ************************************************************** +ok: [web1] + +TASK [web_app : Remove existing container with same name] ************************************************** +changed: [web1] + +TASK [web_app : Pull Docker image] ************************************************************************* +ok: [web1] + +TASK [web_app : Deploy with docker compose] **************************************************************** +changed: [web1] + +TASK [web_app : Wait for application port] ***************************************************************** +ok: [web1] + +TASK [web_app : Pause for application to start listening] ************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [web1] + +TASK [web_app : Check health endpoint] ********************************************************************* +ok: [web1] + +PLAY RECAP ************************************************************************************************* +web1 : ok=20 changed=3 unreachable=0 failed=0 skipped=5 rescued=0 ignored=0 + +zsh: missing end of string +Vault password: + +PLAY [Deploy application] ********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************* +ok: [web1] + +TASK [docker : Install dependencies for Docker] ************************************************************ +ok: [web1] + +TASK [docker : Add Docker GPG key] ************************************************************************* +ok: [web1] + +TASK [docker : Add Docker repository] ********************************************************************** +[WARNING]: Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:23:15 + +21 - name: Add Docker repository +22 ansible.builtin.apt_repository: +23 repo: >- + ^ column 15 + +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. + +ok: [web1] + +TASK [docker : Update Apt cache after adding Docker repo] ************************************************** +changed: [web1] + +TASK [docker : Install Docker packages] ******************************************************************** +ok: [web1] + +TASK [docker : Ensure Docker service is enabled and started] *********************************************** +ok: [web1] + +TASK [docker : Add remote user to docker group] ************************************************************ +ok: [web1] + +TASK [docker : Install python3-docker for Ansible Docker modules] ****************************************** +ok: [web1] + +TASK [web_app : Include wipe tasks] ************************************************************************ +included: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for web1 + +TASK [web_app : Stop and remove containers via docker compose] ********************************************* +skipping: [web1] + +TASK [web_app : Remove docker-compose file] **************************************************************** +skipping: [web1] + +TASK [web_app : Remove application directory] ************************************************************** +skipping: [web1] + +TASK [web_app : Remove Docker image (optional cleanup)] **************************************************** +skipping: [web1] + +TASK [web_app : Log wipe completion] *********************************************************************** +skipping: [web1] + +TASK [web_app : Ensure Docker Hub credentials are set] ***************************************************** +ok: [web1] => { + "changed": false, + "msg": "All assertions passed" +} + +TASK [web_app : Log in to Docker Hub] ********************************************************************** +ok: [web1] + +TASK [web_app : Create application directory] ************************************************************** +ok: [web1] + +TASK [web_app : Template docker-compose file] ************************************************************** +ok: [web1] + +TASK [web_app : Remove existing container with same name] ************************************************** +changed: [web1] + +TASK [web_app : Pull Docker image] ************************************************************************* +ok: [web1] + +TASK [web_app : Deploy with docker compose] **************************************************************** +changed: [web1] + +TASK [web_app : Wait for application port] ***************************************************************** +ok: [web1] + +TASK [web_app : Pause for application to start listening] ************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [web1] + +TASK [web_app : Check health endpoint] ********************************************************************* +ok: [web1] + +PLAY RECAP ************************************************************************************************* +web1 : ok=20 changed=3 unreachable=0 failed=0 skipped=5 rescued=0 ignored=0 + + +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +d3019061b43c mirana18/devops-info-service:latest "python app.py" 20 minutes ago Up 20 minutes (healthy) 0.0.0.0:8000->5001/tcp, [::]:8000->5001/tcp devops-app +{"status":"healthy","timestamp":"2026-03-04T19:32:33.090645.000Z","uptime_seconds":1224.35} +``` +### Research Questions + +**Q: What's the difference between `restart: always` and `restart: unless-stopped`?** +A: `always` restarts the container on any exit and on Docker daemon startup. `unless-stopped` does the same but does NOT restart containers that were manually stopped (with `docker stop`) when the daemon restarts. `unless-stopped` is preferred for deployments — it respects intentional stops. + +**Q: How do Docker Compose networks differ from Docker bridge networks?** +A: Docker Compose automatically creates a dedicated bridge network per project. Containers within the same compose project can reach each other by service name (built-in DNS). Manual `docker run` uses the default bridge where containers communicate only by IP unless you create and attach a custom network. + +**Q: Can you reference Ansible Vault variables in the template?** +A: Yes. Vault-encrypted variables are decrypted in memory during playbook execution. Jinja2 templates render with the decrypted values, so `{{ dockerhub_password }}` in a template would contain the plaintext. Be careful not to expose secrets in files on disk. + +--- + +## Task 3: Wipe Logic (1 pt) + +### Implementation + +Wipe logic uses a **double-gating** mechanism: + +1. **Variable gate**: `web_app_wipe: false` in `defaults/main.yml` — `when: web_app_wipe | bool` condition +2. **Tag gate**: `tags: [web_app_wipe]` — tasks only run when this tag is selected or all tags run + +### Wipe tasks (`roles/web_app/tasks/wipe.yml`) + +1. `docker compose down --remove-orphans` (stop and remove containers) +2. Remove `docker-compose.yml` file +3. Remove application directory (`/opt/`) +4. Optionally remove Docker image +5. Log success message + +### Test Scenarios + +**Scenario 1: Normal deployment (wipe does NOT run)** +```bash +ansible-playbook playbooks/deploy.yml --ask-vault-pass +# Wipe tasks are skipped because web_app_wipe=false +``` + +```bash +Vault password: + +PLAY [Deploy application] ********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************* +ok: [web1] + +TASK [docker : Install dependencies for Docker] ************************************************************ +ok: [web1] + +TASK [docker : Add Docker GPG key] ************************************************************************* +ok: [web1] + +TASK [docker : Add Docker repository] ********************************************************************** +[WARNING]: Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:23:15 + +21 - name: Add Docker repository +22 ansible.builtin.apt_repository: +23 repo: >- + ^ column 15 + +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. + +ok: [web1] + +TASK [docker : Update Apt cache after adding Docker repo] ************************************************** +changed: [web1] + +TASK [docker : Install Docker packages] ******************************************************************** +ok: [web1] + +TASK [docker : Ensure Docker service is enabled and started] *********************************************** +ok: [web1] + +TASK [docker : Add remote user to docker group] ************************************************************ +ok: [web1] + +TASK [docker : Install python3-docker for Ansible Docker modules] ****************************************** +ok: [web1] + +TASK [web_app : Include wipe tasks] ************************************************************************ +included: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for web1 + +TASK [web_app : Stop and remove containers via docker compose] ********************************************* +skipping: [web1] + +TASK [web_app : Remove docker-compose file] **************************************************************** +skipping: [web1] + +TASK [web_app : Remove application directory] ************************************************************** +skipping: [web1] + +TASK [web_app : Remove Docker image (optional cleanup)] **************************************************** +skipping: [web1] + +TASK [web_app : Log wipe completion] *********************************************************************** +skipping: [web1] + +TASK [web_app : Ensure Docker Hub credentials are set] ***************************************************** +ok: [web1] => { + "changed": false, + "msg": "All assertions passed" +} + +TASK [web_app : Log in to Docker Hub] ********************************************************************** +ok: [web1] + +TASK [web_app : Create application directory] ************************************************************** +ok: [web1] + +TASK [web_app : Template docker-compose file] ************************************************************** +ok: [web1] + +TASK [web_app : Remove existing container with same name] ************************************************** +changed: [web1] + +TASK [web_app : Pull Docker image] ************************************************************************* +ok: [web1] + +TASK [web_app : Deploy with docker compose] **************************************************************** +changed: [web1] + +TASK [web_app : Wait for application port] ***************************************************************** +ok: [web1] + +TASK [web_app : Pause for application to start listening] ************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [web1] + +TASK [web_app : Check health endpoint] ********************************************************************* +ok: [web1] + +PLAY RECAP ************************************************************************************************* +web1 : ok=20 changed=3 unreachable=0 failed=0 skipped=5 rescued=0 ignored=0 +``` + +**Scenario 2: Wipe only** +```bash +ansible-playbook playbooks/deploy.yml \ + -e "web_app_wipe=true" \ + --tags web_app_wipe --ask-vault-pass +# Only wipe runs; deployment is skipped (tag not matched) +``` +```bash +Vault password: + +PLAY [Deploy application] ********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************* +ok: [web1] + +TASK [web_app : Include wipe tasks] ************************************************************************ +included: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for web1 + +TASK [web_app : Stop and remove containers via docker compose] ********************************************* +changed: [web1] + +TASK [web_app : Remove docker-compose file] **************************************************************** +changed: [web1] + +TASK [web_app : Remove application directory] ************************************************************** +changed: [web1] + +TASK [web_app : Remove Docker image (optional cleanup)] **************************************************** +changed: [web1] + +TASK [web_app : Log wipe completion] *********************************************************************** +ok: [web1] => { + "msg": "Application devops-app wiped successfully" +} + +PLAY RECAP ************************************************************************************************* +web1 : ok=7 changed=4 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 +``` + +**Scenario 3: Clean reinstall (wipe + deploy)** +```bash +ansible-playbook playbooks/deploy.yml \ + -e "web_app_wipe=true" --ask-vault-pass +# Wipe runs first, then fresh deployment +``` + +```bash +Vault password: + +PLAY [Deploy application] ********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************* +ok: [web1] + +TASK [docker : Install dependencies for Docker] ************************************************************ +ok: [web1] + +TASK [docker : Add Docker GPG key] ************************************************************************* +ok: [web1] + +TASK [docker : Add Docker repository] ********************************************************************** +[WARNING]: Deprecation warnings can be disabled by setting `deprecation_warnings=False` in ansible.cfg. +[DEPRECATION WARNING]: INJECT_FACTS_AS_VARS default to `True` is deprecated, top-level facts will not be auto injected after the change. This feature will be removed from ansible-core version 2.24. +Origin: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/docker/tasks/main.yml:23:15 + +21 - name: Add Docker repository +22 ansible.builtin.apt_repository: +23 repo: >- + ^ column 15 + +Use `ansible_facts["fact_name"]` (no `ansible_` prefix) instead. + +ok: [web1] + +TASK [docker : Update Apt cache after adding Docker repo] ************************************************** +changed: [web1] + +TASK [docker : Install Docker packages] ******************************************************************** +ok: [web1] + +TASK [docker : Ensure Docker service is enabled and started] *********************************************** +ok: [web1] + +TASK [docker : Add remote user to docker group] ************************************************************ +ok: [web1] + +TASK [docker : Install python3-docker for Ansible Docker modules] ****************************************** +ok: [web1] + +TASK [web_app : Include wipe tasks] ************************************************************************ +included: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for web1 + +TASK [web_app : Stop and remove containers via docker compose] ********************************************* +ok: [web1] + +TASK [web_app : Remove docker-compose file] **************************************************************** +ok: [web1] + +TASK [web_app : Remove application directory] ************************************************************** +ok: [web1] + +TASK [web_app : Remove Docker image (optional cleanup)] **************************************************** +changed: [web1] + +TASK [web_app : Log wipe completion] *********************************************************************** +ok: [web1] => { + "msg": "Application devops-app wiped successfully" +} + +TASK [web_app : Ensure Docker Hub credentials are set] ***************************************************** +ok: [web1] => { + "changed": false, + "msg": "All assertions passed" +} + +TASK [web_app : Log in to Docker Hub] ********************************************************************** +ok: [web1] + +TASK [web_app : Create application directory] ************************************************************** +changed: [web1] + +TASK [web_app : Template docker-compose file] ************************************************************** +changed: [web1] + +TASK [web_app : Remove existing container with same name] ************************************************** +ok: [web1] + +TASK [web_app : Pull Docker image] ************************************************************************* +ok: [web1] + +TASK [web_app : Deploy with docker compose] **************************************************************** +changed: [web1] + +TASK [web_app : Wait for application port] ***************************************************************** +ok: [web1] + +TASK [web_app : Pause for application to start listening] ************************************************** +Pausing for 5 seconds +(ctrl+C then 'C' = continue early, ctrl+C then 'A' = abort) +ok: [web1] + +TASK [web_app : Check health endpoint] ********************************************************************* +ok: [web1] + +PLAY RECAP ************************************************************************************************* +web1 : ok=25 changed=5 unreachable=0 failed=0 skipped=0 rescued=0 ignored=0 +``` + +**Scenario 4: Safety — tag without variable** +```bash +ansible-playbook playbooks/deploy.yml --tags web_app_wipe --ask-vault-pass +# Wipe tasks skipped by when condition (web_app_wipe=false) +``` + +```bash +Vault password: + +PLAY [Deploy application] ********************************************************************************** + +TASK [Gathering Facts] ************************************************************************************* +ok: [web1] + +TASK [web_app : Include wipe tasks] ************************************************************************ +included: /Users/arinazimina/Library/Mobile Documents/com~apple~CloudDocs/Study/Third year/spring/DevOps/DevOps-Core-Course/ansible/roles/web_app/tasks/wipe.yml for web1 + +TASK [web_app : Stop and remove containers via docker compose] ********************************************* +skipping: [web1] + +TASK [web_app : Remove docker-compose file] **************************************************************** +skipping: [web1] + +TASK [web_app : Remove application directory] ************************************************************** +skipping: [web1] + +TASK [web_app : Remove Docker image (optional cleanup)] **************************************************** +skipping: [web1] + +TASK [web_app : Log wipe completion] *********************************************************************** +skipping: [web1] + +PLAY RECAP ************************************************************************************************* +web1 : ok=2 changed=0 unreachable=0 failed=0 skipped=5 rescued=0 ignored=0 +``` + +### Research Questions + +**Q: Why use both variable AND tag?** +A: Double safety. The tag prevents wipe from running during normal `ansible-playbook deploy.yml` (tags don't match). The variable prevents wipe if someone runs `--tags web_app_wipe` without setting the variable. Both must be true for wipe to execute — two independent locks. + +**Q: What's the difference between `never` tag and this approach?** +A: The `never` tag is a special Ansible tag that excludes tasks unless `--tags never` is explicitly given. Our approach uses a custom tag + variable, which is more flexible: you can combine wipe with deployment tags for clean-reinstall scenarios. `never` tag can't be combined with deployment — it's all-or-nothing. + +**Q: Why must wipe logic come BEFORE deployment in main.yml?** +A: For the clean-reinstall scenario (`-e "web_app_wipe=true"` without `--tags`). Tasks execute top-to-bottom: wipe removes the old app, then deployment installs fresh. If wipe came after, we'd deploy and then immediately destroy. + +**Q: When would you want clean reinstallation vs. rolling update?** +A: Clean reinstall for major version changes, corrupted state, configuration schema changes, or debugging. Rolling update for minor patches and config tweaks — faster, no downtime. + +**Q: How would you extend this to wipe Docker images and volumes too?** +A: Add `docker rmi` for images (already included), add `docker volume prune -f` or remove specific volumes with `docker volume rm`. Add `docker compose down -v` to remove named volumes defined in compose. + +--- + +## Task 4: CI/CD (3 pts) + +### Workflow architecture + +File: `.github/workflows/ansible-deploy.yml` + +``` +Push to ansible/** → Lint job (ansible-lint) → Deploy job (ansible-playbook) → Verify (curl) +``` + +### Jobs + +1. **`lint`** — runs on `ubuntu-latest`, installs ansible + ansible-lint, lints all playbooks +2. **`deploy`** — needs lint to pass, installs ansible and collections, sets up SSH, runs playbook with vault password, verifies deployment with curl + +### Path filters + +```yaml +paths: + - 'ansible/**' + - '!ansible/docs/**' # skip docs-only changes + - '.github/workflows/ansible-deploy.yml' +``` + +### Required GitHub Secrets + +| Secret | Purpose | +|--------|---------| +| `ANSIBLE_VAULT_PASSWORD` | Decrypt `group_vars/all.yml` | +| `SSH_PRIVATE_KEY` | SSH to target VM | +| `VM_HOST` | Target VM IP address | + +### Status badge + +Added to `ansible/README.md`: + +``` +[![Ansible Deployment](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/ansible-deploy.yml/badge.svg)] +``` + +![img](image.png) + +### Research Questions + +**Q: What are the security implications of storing SSH keys in GitHub Secrets?** +A: GitHub Secrets are encrypted at rest and only exposed to workflows during execution. Risks: anyone with write access to the repo can create workflows that read secrets; secrets may leak in logs if echoed. Mitigation: use deploy keys (read-only), limit repo access, never echo secrets in workflow steps. + +**Q: How would you implement staging -> production pipeline?** +A: Use separate inventory files (`hosts_staging.ini`, `hosts_production.ini`) and separate jobs. Staging deploys first, runs integration tests, then production deploys only if staging succeeds. Use GitHub environments with required reviewers for production. + +**Q: What would you add to make rollbacks possible?** +A: Pin Docker image tags (not `latest`) with version numbers. Keep previous docker-compose.yml as a backup. On rollback, deploy with the previous tag. Alternatively, use blue-green deployment — keep old container running on a different port until new one is verified. + +**Q: How does self-hosted runner improve security?** +A: Self-hosted runner runs inside your infrastructure — SSH keys never leave the network. No secrets stored in GitHub (runner already has access). Faster execution (no SSH overhead). Downside: you must maintain the runner and secure the VM it runs on. + +--- + +## Task 5: Documentation + +This file serves as the complete Lab 6 documentation. + +--- + +## Summary + +### What was accomplished + +- Refactored all roles with blocks (rescue/always) and tags for selective execution +- Migrated from `docker run` to Docker Compose with Jinja2 templates +- Implemented role dependencies (web_app depends on docker) +- Added double-gated wipe logic (variable + tag) +- Created CI/CD workflow with linting, deployment, and verification +- Added status badge to README + +### Key learnings + +- Blocks enable error handling in Ansible (similar to try/catch) +- Tags allow running subsets of tasks without modifying playbooks +- Docker Compose templates make deployments declarative and reproducible +- Double-gating (variable + tag) prevents accidental destructive operations +- CI/CD with path filters avoids unnecessary deployments + diff --git a/ansible/docs/image.png b/ansible/docs/image.png new file mode 100644 index 0000000000..d9f0ca9b8c Binary files /dev/null and b/ansible/docs/image.png differ diff --git a/ansible/group_vars/all.yml b/ansible/group_vars/all.yml new file mode 100644 index 0000000000..0708fbc432 --- /dev/null +++ b/ansible/group_vars/all.yml @@ -0,0 +1,17 @@ +$ANSIBLE_VAULT;1.1;AES256 +33626238666363353934303662313165373836383335623363396435393665316637393238353332 +6539646232313037626665366561623830616534326137630a623964306538623234353461333164 +32353931616162633635336234363033393563323864323961346661303233343366643966613261 +6233653366306237330a393762353630323330653032373233623938353162303161643336623664 +33653161333630643365343463656130363234643135643234353935616264333034356530306639 +33313364343039323134383564616137626462646437366539366637303862333234663862643064 +39626332623165623733353737613437313336343535626632383533353536393237396236353739 +34383465616234623765393130346331373139313738363332393937643666333666623436393938 +37353233396131343536663432353037313564653461366138366166366462323237326333366637 +61623737316632623637376663613239613364653939396663306631393038343639383064323938 +33313865323735366630373263333231353864386438353535303936313935623131666232346232 +35373934323961373939366665373232306263613338356239333663656264643163316538633031 +37653539386531326166656463643264313439396437303563666337636238623533633435353863 +64646365303261636265363836656638316331333131343539623033663566343631613963393233 +36333862303563356437623430633762626463346331343731353066656538643466663135303533 +34656537653937363032 diff --git a/ansible/group_vars/all.yml.example b/ansible/group_vars/all.yml.example new file mode 100644 index 0000000000..e3e7713898 --- /dev/null +++ b/ansible/group_vars/all.yml.example @@ -0,0 +1,14 @@ +# Copy this file to all.yml and encrypt with Ansible Vault: +# ansible-vault create group_vars/all.yml +# Then paste the content below and save. + +# Docker Hub credentials (required for deploy) +dockerhub_username: your-dockerhub-username +dockerhub_password: your-dockerhub-password-or-access-token + +# Application configuration +app_name: devops-app +docker_image: "{{ dockerhub_username }}/devops-info-service" +docker_tag: latest +app_port: 8000 +app_internal_port: 5001 diff --git a/ansible/inventory/hosts.ini b/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..1e401bb77e --- /dev/null +++ b/ansible/inventory/hosts.ini @@ -0,0 +1,7 @@ +# Replace YOUR_VM_IP with your VM's public IP (Pulumi: pulumi stack output public_ip | Terraform: terraform output vm_public_ip) +# Replace ubuntu with your SSH user if different +[webservers] +web1 ansible_host=89.169.129.155 ansible_user=ubuntu + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/ansible/inventory/yandex_cloud.yml b/ansible/inventory/yandex_cloud.yml new file mode 100644 index 0000000000..748308b120 --- /dev/null +++ b/ansible/inventory/yandex_cloud.yml @@ -0,0 +1,14 @@ +plugin: yandex.cloud.yandex_compute + +folder_id: "{{ lookup('env', 'YANDEX_FOLDER_ID') }}" + +auth_kind: token + +compose: + ansible_host: network_interfaces[0].primary_v4_address.one_to_one_nat.address + ansible_user: ubuntu + +# Group by zone (optional) +keyed_groups: + - key: zone_id + prefix: zone diff --git a/ansible/playbooks/deploy-monitoring.yml b/ansible/playbooks/deploy-monitoring.yml new file mode 100644 index 0000000000..50deedbcaf --- /dev/null +++ b/ansible/playbooks/deploy-monitoring.yml @@ -0,0 +1,8 @@ +- name: Deploy Loki monitoring stack + hosts: webservers + become: true + vars_files: + - ../group_vars/all.yml + + roles: + - monitoring diff --git a/ansible/playbooks/deploy.yml b/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..b771ad6b9e --- /dev/null +++ b/ansible/playbooks/deploy.yml @@ -0,0 +1,9 @@ +--- +- name: Deploy application + hosts: webservers + become: true + vars_files: + - ../group_vars/all.yml + + roles: + - web_app diff --git a/ansible/playbooks/deploy_all.yml b/ansible/playbooks/deploy_all.yml new file mode 100644 index 0000000000..acfcb5cec3 --- /dev/null +++ b/ansible/playbooks/deploy_all.yml @@ -0,0 +1,28 @@ +- name: Deploy All Applications + hosts: webservers + become: true + vars_files: + - ../group_vars/all.yml + + tasks: + - name: Deploy Python app + ansible.builtin.include_role: + name: web_app + vars: + app_name: devops-python + docker_image: "{{ dockerhub_username }}/devops-info-service" + docker_tag: latest + app_port: 8000 + app_internal_port: 5001 + compose_project_dir: "/opt/devops-python" + + - name: Deploy Bonus (Go) app + ansible.builtin.include_role: + name: web_app + vars: + app_name: devops-go + docker_image: "{{ dockerhub_username }}/devops-info-service-go" + docker_tag: latest + app_port: 8001 + app_internal_port: 8080 + compose_project_dir: "/opt/devops-go" diff --git a/ansible/playbooks/deploy_bonus.yml b/ansible/playbooks/deploy_bonus.yml new file mode 100644 index 0000000000..38a35d16a6 --- /dev/null +++ b/ansible/playbooks/deploy_bonus.yml @@ -0,0 +1,9 @@ +- name: Deploy Bonus (Go) Application + hosts: webservers + become: true + vars_files: + - ../group_vars/all.yml + - ../vars/app_bonus.yml + + roles: + - web_app diff --git a/ansible/playbooks/deploy_python.yml b/ansible/playbooks/deploy_python.yml new file mode 100644 index 0000000000..ae48eedbcd --- /dev/null +++ b/ansible/playbooks/deploy_python.yml @@ -0,0 +1,9 @@ +- name: Deploy Python Application + hosts: webservers + become: true + vars_files: + - ../group_vars/all.yml + - ../vars/app_python.yml + + roles: + - web_app diff --git a/ansible/playbooks/provision.yml b/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/ansible/requirements.yml b/ansible/requirements.yml new file mode 100644 index 0000000000..483ed156a5 --- /dev/null +++ b/ansible/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: community.general + - name: community.docker diff --git a/ansible/roles/common/defaults/main.yml b/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..50323f8b6a --- /dev/null +++ b/ansible/roles/common/defaults/main.yml @@ -0,0 +1,13 @@ +--- +common_use_yandex_mirror: true +common_packages: + - python3-pip + - curl + - git + - vim + - htop + - unzip + - ca-certificates + - gnupg + - lsb-release +common_timezone: "Europe/Moscow" diff --git a/ansible/roles/common/tasks/main.yml b/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..df67dbc722 --- /dev/null +++ b/ansible/roles/common/tasks/main.yml @@ -0,0 +1,73 @@ +--- +- name: Install packages and configure system + become: true + tags: + - packages + - common + block: + - name: Force Apt to use IPv4 only + ansible.builtin.copy: + content: 'Acquire::ForceIPv4 "true";' + dest: /etc/apt/apt.conf.d/99force-ipv4 + owner: root + group: root + mode: "0644" + when: common_use_yandex_mirror | default(false) | bool + + - name: Configure Yandex mirror for Ubuntu + ansible.builtin.template: + src: sources.list.yandex.j2 + dest: /etc/apt/sources.list + owner: root + group: root + mode: "0644" + when: common_use_yandex_mirror | default(false) | bool + + - name: Update Apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + update_cache_retries: 10 + update_cache_retry_max_delay: 30 + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + rescue: + - name: Retry Apt cache update on failure + ansible.builtin.apt: + update_cache: true + register: common_apt_fix_result + changed_when: false + failed_when: false + + - name: Retry installing common packages after fix + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + always: + - name: Log package installation completion + ansible.builtin.copy: + content: "common role packages block completed at {{ ansible_date_time.iso8601 }}\n" + dest: /tmp/ansible_common_packages.log + mode: "0644" + +- name: Configure users and system settings + become: true + tags: + - users + - common + block: + - name: Set timezone + community.general.timezone: + name: "{{ common_timezone }}" + + always: + - name: Log user configuration completion + ansible.builtin.copy: + content: "common role users block completed at {{ ansible_date_time.iso8601 }}\n" + dest: /tmp/ansible_common_users.log + mode: "0644" diff --git a/ansible/roles/common/templates/sources.list.yandex.j2 b/ansible/roles/common/templates/sources.list.yandex.j2 new file mode 100644 index 0000000000..0cc8779a45 --- /dev/null +++ b/ansible/roles/common/templates/sources.list.yandex.j2 @@ -0,0 +1,4 @@ +# Ubuntu {{ ansible_facts['distribution_release'] }} — Yandex mirror (often works better from Yandex Cloud) +deb http://mirror.yandex.ru/ubuntu/ {{ ansible_facts['distribution_release'] }} main restricted universe multiverse +deb http://mirror.yandex.ru/ubuntu/ {{ ansible_facts['distribution_release'] }}-updates main restricted universe multiverse +deb http://mirror.yandex.ru/ubuntu/ {{ ansible_facts['distribution_release'] }}-security main restricted universe multiverse diff --git a/ansible/roles/docker/defaults/main.yml b/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..b91e3451e0 --- /dev/null +++ b/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,4 @@ +--- +docker_install_compose: false +# User(s) to add to docker group (e.g. [ubuntu]) +docker_users: [] diff --git a/ansible/roles/docker/handlers/main.yml b/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..07aa0eb290 --- /dev/null +++ b/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/ansible/roles/docker/tasks/main.yml b/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..6c208d6b7b --- /dev/null +++ b/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,88 @@ +--- +- name: Install Docker engine + become: true + tags: + - docker_install + - docker + block: + - name: Install dependencies for Docker + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + + - name: Add Docker GPG key + ansible.builtin.apt_key: + url: https://download.docker.com/linux/ubuntu/gpg + state: present + + - name: Add Docker repository + ansible.builtin.apt_repository: + repo: >- + deb [arch={{ ansible_architecture | lower | + replace('x86_64', 'amd64') | + replace('aarch64', 'arm64') }}] + https://download.docker.com/linux/ubuntu + {{ ansible_distribution_release }} stable + state: present + filename: docker + notify: Restart docker + + - name: Update Apt cache after adding Docker repo + ansible.builtin.apt: + update_cache: true + cache_valid_time: 0 + + - name: Install Docker packages + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + state: present + notify: Restart docker + + rescue: + - name: Wait before retrying after GPG/network failure + ansible.builtin.pause: + seconds: 10 + + - name: Retry Apt update after failure + ansible.builtin.apt: + update_cache: true + changed_when: false + + - name: Retry Docker packages installation + ansible.builtin.apt: + name: + - docker-ce + - docker-ce-cli + - containerd.io + state: present + + always: + - name: Ensure Docker service is enabled and started + ansible.builtin.service: + name: docker + state: started + enabled: true + failed_when: false + +- name: Configure Docker environment + become: true + tags: + - docker_config + - docker + block: + - name: Add remote user to docker group + ansible.builtin.user: + name: "{{ ansible_user }}" + groups: docker + append: true + + - name: Install python3-docker for Ansible Docker modules + ansible.builtin.apt: + name: python3-docker + state: present diff --git a/ansible/roles/monitoring/defaults/main.yml b/ansible/roles/monitoring/defaults/main.yml new file mode 100644 index 0000000000..2fe319db96 --- /dev/null +++ b/ansible/roles/monitoring/defaults/main.yml @@ -0,0 +1,30 @@ +monitoring_project_dir: /opt/monitoring +loki_version: "3.0.0" +promtail_version: "3.0.0" +grafana_version: "12.3.1" +prometheus_version: "v3.10.0" +loki_http_port: 3100 +grafana_http_port: 3000 +promtail_http_port: 9080 +prometheus_http_port: 9090 +loki_retention_hours: 168 # 7 days +dockerhub_username_for_monitoring: "{{ dockerhub_username | default('your-dockerhub-username') }}" +grafana_admin_password: "{{ vault_grafana_admin_password | default('admin') }}" +# Prometheus config +prometheus_retention_time: "15d" +prometheus_retention_size: "10GB" +prometheus_scrape_interval: "15s" + +prometheus_targets: + - job: "prometheus" + targets: ["localhost:9090"] + - job: "loki" + targets: ["loki:3100"] + path: "/metrics" + - job: "grafana" + targets: ["grafana:3000"] + path: "/metrics" + - job: "app" + targets: ["app-python:5001"] + path: "/metrics" + diff --git a/ansible/roles/monitoring/meta/main.yml b/ansible/roles/monitoring/meta/main.yml new file mode 100644 index 0000000000..6ad37f8159 --- /dev/null +++ b/ansible/roles/monitoring/meta/main.yml @@ -0,0 +1,2 @@ +dependencies: + - role: docker diff --git a/ansible/roles/monitoring/tasks/deploy.yml b/ansible/roles/monitoring/tasks/deploy.yml new file mode 100644 index 0000000000..403f3bbfcd --- /dev/null +++ b/ansible/roles/monitoring/tasks/deploy.yml @@ -0,0 +1,33 @@ +- name: Deploy Loki stack with Docker Compose v2 + community.docker.docker_compose_v2: + project_src: "{{ monitoring_project_dir }}" + state: present + register: monitoring_compose_result + +- name: Wait for Prometheus to be ready + ansible.builtin.uri: + url: "http://127.0.0.1:{{ prometheus_http_port }}/-/ready" + status_code: 200 + register: prometheus_ready + retries: 12 + delay: 5 + until: (prometheus_ready.status | default(0)) == 200 + + +- name: Wait for Loki to be ready + ansible.builtin.uri: + url: "http://127.0.0.1:{{ loki_http_port }}/ready" + status_code: 200 + register: loki_ready + retries: 12 + delay: 5 + until: (loki_ready.status | default(0)) == 200 + +- name: Wait for Grafana to be ready + ansible.builtin.uri: + url: "http://127.0.0.1:{{ grafana_http_port }}/api/health" + status_code: 200 + register: grafana_ready + retries: 15 + delay: 5 + until: (grafana_ready.status | default(0)) == 200 diff --git a/ansible/roles/monitoring/tasks/main.yml b/ansible/roles/monitoring/tasks/main.yml new file mode 100644 index 0000000000..2d14580824 --- /dev/null +++ b/ansible/roles/monitoring/tasks/main.yml @@ -0,0 +1,5 @@ +- name: Setup monitoring (dirs and configs) + ansible.builtin.include_tasks: setup.yml + +- name: Deploy and verify Loki stack + ansible.builtin.include_tasks: deploy.yml diff --git a/ansible/roles/monitoring/tasks/setup.yml b/ansible/roles/monitoring/tasks/setup.yml new file mode 100644 index 0000000000..4f8f08d80b --- /dev/null +++ b/ansible/roles/monitoring/tasks/setup.yml @@ -0,0 +1,58 @@ +- name: Create monitoring directory structure + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ monitoring_project_dir }}" + - "{{ monitoring_project_dir }}/loki" + - "{{ monitoring_project_dir }}/promtail" + - "{{ monitoring_project_dir }}/prometheus" + - "{{ monitoring_project_dir }}/grafana/provisioning/datasources" + - "{{ monitoring_project_dir }}/grafana/provisioning/dashboards" + - "{{ monitoring_project_dir }}/grafana/provisioning/dashboards-json" + +- name: Template Loki config + ansible.builtin.template: + src: loki-config.yml.j2 + dest: "{{ monitoring_project_dir }}/loki/config.yml" + mode: "0644" + +- name: Template Promtail config + ansible.builtin.template: + src: promtail-config.yml.j2 + dest: "{{ monitoring_project_dir }}/promtail/config.yml" + mode: "0644" + +- name: Template Prometheus config + ansible.builtin.template: + src: prometheus.yml.j2 + dest: "{{ monitoring_project_dir }}/prometheus/prometheus.yml" + mode: "0644" + +- name: Install Grafana datasources provisioning + ansible.builtin.copy: + src: grafana-datasources.yml + dest: "{{ monitoring_project_dir }}/grafana/provisioning/datasources/datasources.yml" + mode: "0644" + +- name: Install Grafana dashboards provisioning + ansible.builtin.copy: + src: grafana-dashboards.yml + dest: "{{ monitoring_project_dir }}/grafana/provisioning/dashboards/dashboards.yml" + mode: "0644" + +- name: Install Grafana dashboards JSON + ansible.builtin.copy: + src: "{{ item }}" + dest: "{{ monitoring_project_dir }}/grafana/provisioning/dashboards-json/{{ item }}" + mode: "0644" + loop: + - grafana-app-metrics-dashboard.json + - grafana-loki-overview-dashboard.json + +- name: Template docker-compose for monitoring stack + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ monitoring_project_dir }}/docker-compose.yml" + mode: "0644" diff --git a/ansible/roles/monitoring/templates/docker-compose.yml.j2 b/ansible/roles/monitoring/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..1bdf75efd8 --- /dev/null +++ b/ansible/roles/monitoring/templates/docker-compose.yml.j2 @@ -0,0 +1,149 @@ +services: + prometheus: + image: prom/prometheus:{{ prometheus_version }} + container_name: prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time={{ prometheus_retention_time }}' + - '--storage.tsdb.retention.size={{ prometheus_retention_size }}' + ports: + - "{{ prometheus_http_port }}:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + + loki: + image: grafana/loki:{{ loki_version }} + container_name: loki + user: "0:0" + command: -config.file=/etc/loki/config.yml + ports: + - "{{ loki_http_port }}:3100" + labels: + logging: "promtail" + app: "loki" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/var/loki + networks: + - logging + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget -q -O- http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M + + promtail: + image: grafana/promtail:{{ promtail_version }} + container_name: promtail + command: -config.file=/etc/promtail/config.yml + labels: + logging: "promtail" + app: "promtail" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + restart: unless-stopped + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + + grafana: + image: grafana/grafana:{{ grafana_version }} + container_name: grafana + ports: + - "{{ grafana_http_port }}:3000" + labels: + logging: "promtail" + app: "grafana" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ADMIN_PASSWORD={{ grafana_admin_password }} + - GF_USERS_ALLOW_SIGN_UP=false + volumes: + - grafana-data:/var/lib/grafana + - ./grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro + - ./grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro + - ./grafana/provisioning/dashboards-json:/var/lib/grafana/dashboards:ro + networks: + - logging + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget -q -O- http://localhost:3000/api/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 15s + deploy: + resources: + limits: + cpus: '1.0' + memory: 512M + + app-python: + image: {{ dockerhub_username_for_monitoring }}/devops-info-service:latest + container_name: devops-python + ports: + - "8000:5001" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5001/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + + app-go: + image: {{ dockerhub_username_for_monitoring }}/devops-info-service-go:latest + container_name: devops-go + ports: + - "8001:8080" + networks: + - logging + labels: + logging: "promtail" + app: "devops-go" + restart: unless-stopped + +networks: + logging: + driver: bridge + +volumes: + loki-data: + grafana-data: + prometheus-data: diff --git a/ansible/roles/monitoring/templates/loki-config.yml.j2 b/ansible/roles/monitoring/templates/loki-config.yml.j2 new file mode 100644 index 0000000000..60717fcdb3 --- /dev/null +++ b/ansible/roles/monitoring/templates/loki-config.yml.j2 @@ -0,0 +1,50 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + grpc_listen_port: 9096 + +common: + instance_addr: 127.0.0.1 + path_prefix: /var/loki + storage: + filesystem: + chunks_directory: /var/loki/chunks + rules_directory: /var/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_scheduler: + max_outstanding_requests_per_tenant: 2048 + +schema_config: + configs: + - from: "2024-01-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + filesystem: + directory: /var/loki/chunks + tsdb_shipper: + active_index_directory: /var/loki/tsdb-index + cache_location: /var/loki/tsdb-cache + +limits_config: + retention_period: {{ loki_retention_hours }}h + +compactor: + working_directory: /var/loki/compactor + retention_enabled: true + compaction_interval: 10m + apply_retention_interval: 10m + delete_request_store: filesystem + +analytics: + reporting_enabled: false diff --git a/ansible/roles/monitoring/templates/promtail-config.yml.j2 b/ansible/roles/monitoring/templates/promtail-config.yml.j2 new file mode 100644 index 0000000000..5b51aedba4 --- /dev/null +++ b/ansible/roles/monitoring/templates/promtail-config.yml.j2 @@ -0,0 +1,24 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_label_app'] + target_label: 'app' diff --git a/ansible/roles/web_app/defaults/main.yml b/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..5e50caf78b --- /dev/null +++ b/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,18 @@ +--- +# Application settings +app_name: devops-app +docker_image: "{{ dockerhub_username }}/devops-info-service" +docker_tag: latest +app_port: 8000 +# Python devops-info-service listens on 5001 inside container (see app_python/app.py) +app_internal_port: 5001 +app_restart_policy: unless-stopped +app_env: {} + +# Docker Compose settings +compose_project_dir: "/opt/{{ app_name }}" + +# Wipe Logic Control — default: do not wipe +# Wipe only: ansible-playbook deploy.yml -e "web_app_wipe=true" --tags web_app_wipe +# Clean install: ansible-playbook deploy.yml -e "web_app_wipe=true" +web_app_wipe: false diff --git a/ansible/roles/web_app/handlers/main.yml b/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..9ea684d80e --- /dev/null +++ b/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart app via docker compose + ansible.builtin.command: + cmd: "docker compose restart" + chdir: "{{ compose_project_dir }}" + become: true diff --git a/ansible/roles/web_app/meta/main.yml b/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..cb7d8e0460 --- /dev/null +++ b/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker diff --git a/ansible/roles/web_app/tasks/main.yml b/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..fd64707b4b --- /dev/null +++ b/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,90 @@ +--- +- name: Include wipe tasks + ansible.builtin.include_tasks: wipe.yml + tags: + - web_app_wipe + +- name: Deploy application with Docker Compose + become: true + tags: + - app_deploy + - compose + block: + - name: Ensure Docker Hub credentials are set + ansible.builtin.assert: + that: + - dockerhub_username is defined + - dockerhub_password is defined + fail_msg: > + Define dockerhub_username and dockerhub_password in group_vars/all.yml. + + - name: Log in to Docker Hub + community.docker.docker_login: + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + registry: https://index.docker.io/v1/ + no_log: true + + - name: Create application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: directory + mode: "0755" + + - name: Template docker-compose file + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ compose_project_dir }}/docker-compose.yml" + mode: "0644" + + - name: Remove existing container with same name + community.docker.docker_container: + name: "{{ app_name }}" + state: absent + failed_when: false + + - name: Free host port if occupied by another container (e.g. devops-app when deploying devops-python) + ansible.builtin.shell: | + cid=$(docker ps -q --filter "publish={{ app_port }}") + [ -n "$cid" ] && docker rm -f $cid || true + changed_when: false + failed_when: false + + - name: Pull Docker image + ansible.builtin.command: + cmd: "docker compose pull" + chdir: "{{ compose_project_dir }}" + changed_when: false + + - name: Deploy with docker compose + ansible.builtin.command: + cmd: "docker compose up -d --force-recreate" + chdir: "{{ compose_project_dir }}" + changed_when: true + + - name: Wait for application port + ansible.builtin.wait_for: + port: "{{ app_port }}" + delay: 3 + timeout: 30 + + - name: Pause for application to start listening + ansible.builtin.pause: + seconds: 5 + + - name: Check health endpoint + ansible.builtin.uri: + url: "http://127.0.0.1:{{ app_port }}/health" + return_content: true + timeout: 15 + register: health_result + changed_when: false + + rescue: + - name: Log deployment failure + ansible.builtin.debug: + msg: "Deployment of {{ app_name }} failed. Check logs with: docker compose -f {{ compose_project_dir }}/docker-compose.yml logs" + + - name: Fail after logging + ansible.builtin.fail: + msg: "Deployment failed for {{ app_name }}" diff --git a/ansible/roles/web_app/tasks/wipe.yml b/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..da635fae12 --- /dev/null +++ b/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,31 @@ +--- +- name: Wipe web application + become: true + tags: + - web_app_wipe + when: web_app_wipe | default(false) | bool + block: + - name: Stop and remove containers via docker compose + ansible.builtin.command: + cmd: docker compose down --remove-orphans + chdir: "{{ compose_project_dir }}" + failed_when: false + + - name: Remove docker-compose file + ansible.builtin.file: + path: "{{ compose_project_dir }}/docker-compose.yml" + state: absent + + - name: Remove application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: absent + + - name: Remove Docker image (optional cleanup) + ansible.builtin.command: + cmd: "docker rmi {{ docker_image }}:{{ docker_tag }}" + failed_when: false + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ app_name }} wiped successfully" diff --git a/ansible/roles/web_app/templates/docker-compose.yml.j2 b/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..119fe1eb06 --- /dev/null +++ b/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,15 @@ +--- +# version is obsolete in Compose V2, omitted to avoid warning +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_tag }} + container_name: {{ app_name }} + ports: + - "{{ app_port }}:{{ app_internal_port }}" +{% if app_env and app_env | length > 0 %} + environment: +{% for key, value in app_env.items() %} + {{ key }}: "{{ value }}" +{% endfor %} +{% endif %} + restart: {{ app_restart_policy }} diff --git a/ansible/vars/app_bonus.yml b/ansible/vars/app_bonus.yml new file mode 100644 index 0000000000..52579fd9fa --- /dev/null +++ b/ansible/vars/app_bonus.yml @@ -0,0 +1,7 @@ +--- +app_name: devops-go +docker_image: "{{ dockerhub_username }}/devops-info-service-go" +docker_tag: latest +app_port: 8001 +app_internal_port: 8080 +compose_project_dir: "/opt/{{ app_name }}" diff --git a/ansible/vars/app_python.yml b/ansible/vars/app_python.yml new file mode 100644 index 0000000000..b27fb59ada --- /dev/null +++ b/ansible/vars/app_python.yml @@ -0,0 +1,7 @@ +--- +app_name: devops-python +docker_image: "{{ dockerhub_username }}/devops-info-service" +docker_tag: latest +app_port: 8000 +app_internal_port: 5001 +compose_project_dir: "/opt/{{ app_name }}" diff --git a/app_go/.dockerignore b/app_go/.dockerignore new file mode 100644 index 0000000000..f31cd17b67 --- /dev/null +++ b/app_go/.dockerignore @@ -0,0 +1,46 @@ +# Git +.git +.gitignore +.gitattributes + +# Documentation +README.md +docs/ +*.md + +# Build artifacts +devops-info-service +devops-info-service-* +*.exe + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Test files +*_test.go +test/ +tests/ + +# CI/CD files +.github/ +.gitlab-ci.yml +Jenkinsfile + +# Docker files +Dockerfile* +.dockerignore + +# Screenshots and media +screenshots/ +*.jpg +*.png +*.gif + +# Temporary files +*.tmp +*.log diff --git a/app_go/Dockerfile b/app_go/Dockerfile new file mode 100644 index 0000000000..9c80b79e40 --- /dev/null +++ b/app_go/Dockerfile @@ -0,0 +1,42 @@ +# Stage 1: Builder +FROM golang:1.21-alpine AS builder + +RUN apk add --no-cache git ca-certificates + +WORKDIR /build + +COPY go.mod go.sum* ./ +RUN go mod download + +COPY main.go ./ + +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags="-s -w" \ + -a -installsuffix cgo \ + -o devops-info-service \ + main.go + +# Stage 2: Runtime +FROM alpine:3.19 + +RUN apk --no-cache add ca-certificates + +RUN addgroup -g 1000 appuser && \ + adduser -D -u 1000 -G appuser appuser + +WORKDIR /app + +COPY --from=builder /build/devops-info-service . + +RUN chown -R appuser:appuser /app + +USER appuser + +EXPOSE 8080 + +ENV PORT=8080 + +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1 + +CMD ["./devops-info-service"] diff --git a/app_go/README.md b/app_go/README.md new file mode 100644 index 0000000000..13d1709879 --- /dev/null +++ b/app_go/README.md @@ -0,0 +1,325 @@ +# DevOps Info Service - Go + +[![Go CI](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/go-ci.yml/badge.svg)](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/go-ci.yml) +[![codecov](https://codecov.io/github/Arino4kaMyr/DevOps-Core-Course/graph/badge.svg?flag=go)](https://codecov.io/github/Arino4kaMyr/DevOps-Core-Course?flag=go) + +A production-ready web service implemented in Go that provides comprehensive information about itself and its runtime environment. This is the compiled language version of the DevOps Info Service, built using Go's standard `net/http` package. + +## Overview + +The DevOps Info Service (Go version) is a RESTful API that exposes system information, runtime metrics, and health status. This implementation demonstrates the benefits of compiled languages: small binary size, fast execution, and single-file deployment. + +**Key Features:** +- System information endpoint (`GET /`) +- Health check endpoint (`GET /health`) +- Configurable via environment variables +- Single binary deployment (no runtime dependencies) +- Fast startup and execution + +## Prerequisites + +- **Go:** 1.21 or higher +- **Git:** For dependency management (if using external packages) + +## Installation + +### Option 1: Build from Source + +1. **Clone the repository:** + ```bash + git clone + cd DevOps-Core-Course/app_go + ``` + +2. **Build the application:** + ```bash + go build -o devops-info-service main.go + ``` + +3. **Run the binary:** + ```bash + ./devops-info-service + ``` + +### Option 2: Install Directly + +```bash +go install ./... +``` + +The binary will be installed to `$GOPATH/bin` (or `$HOME/go/bin` by default). + +## Running the Application + +### Basic Usage + +Run the application with default settings (port: `8080`): + +```bash +# If built locally +./devops-info-service + +# Or run directly with go +go run main.go +``` + +### Custom Configuration + +Configure the application using environment variables: + +```bash +# Custom port +PORT=3000 ./devops-info-service + +# Or with go run +PORT=3000 go run main.go +``` + +The service will be available at `http://0.0.0.0:` + +## API Endpoints + +### `GET /` + +Returns comprehensive service and system information. + +**Response:** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Go net/http" + }, + "system": { + "hostname": "my-laptop", + "platform": "darwin", + "platform_version": "go1.21.0", + "architecture": "arm64", + "cpu_count": 8, + "go_version": "go1.21.0" + }, + "runtime": { + "uptime_seconds": 3600.5, + "uptime_human": "1 hour, 0 minutes, 0 seconds", + "current_time": "2026-01-31T17:30:00.000Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/7.81.0", + "method": "GET", + "path": "/" + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] +} +``` + +**Example Request:** +```bash +curl http://localhost:8080/ +``` + +### `GET /health` + +Simple health check endpoint for monitoring and Kubernetes probes. + +**Response:** +```json +{ + "status": "healthy", + "timestamp": "2026-01-31T17:30:00.000Z", + "uptime_seconds": 3600.5 +} +``` + +**Status Codes:** +- `200 OK`: Service is healthy + +**Example Request:** +```bash +curl http://localhost:8080/health +``` + +## Configuration + +The application can be configured using the following environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `PORT` | `8080` | Port number to listen on | + +## Build Process + +### Development Build + +```bash +go build -o devops-info-service main.go +``` + +### Production Build (Optimized) + +```bash +# Build with optimizations and smaller binary size +go build -ldflags="-s -w" -o devops-info-service main.go +``` + +**Build Flags:** +- `-ldflags="-s -w"`: Strip debug information and symbol table (reduces binary size) + +### Cross-Platform Build + +```bash +# Build for Linux +GOOS=linux GOARCH=amd64 go build -o devops-info-service-linux main.go + +# Build for Windows +GOOS=windows GOARCH=amd64 go build -o devops-info-service.exe main.go + +# Build for macOS (ARM) +GOOS=darwin GOARCH=arm64 go build -o devops-info-service-darwin-arm64 main.go +``` + +## Binary Size Comparison + +### Go Binary Size + +```bash +$ ls -lh devops-info-service +-rwxr-xr-x 1 user staff 8.5M devops-info-service + +# With optimizations +$ go build -ldflags="-s -w" -o devops-info-service main.go +$ ls -lh devops-info-service +-rwxr-xr-x 1 user staff 6.2M devops-info-service +``` + +### Python Comparison + +- **Go binary:** ~6-8 MB (single file, no dependencies) +- **Python application:** Requires Python runtime (~50-100 MB) + dependencies (~10-20 MB) = ~60-120 MB total + +**Advantages of Go:** +- Single binary deployment (no runtime installation needed) +- Faster startup time +- Lower memory footprint +- Better suited for containerized deployments (smaller images) + +## Project Structure + +``` +app_go/ +├── main.go # Main application +├── go.mod # Go module definition +├── README.md # This file +└── docs/ # Documentation + ├── LAB01.md # Lab submission documentation + ├── GO.md # Language justification + └── screenshots/ # Screenshots and proof of work +``` + +## Dependencies + +This implementation uses only Go's standard library: +- `net/http` - HTTP server and client +- `encoding/json` - JSON encoding/decoding +- `os` - Operating system interface +- `runtime` - Runtime information +- `time` - Time operations +- `fmt` - Formatted I/O +- `strings` - String manipulation + +No external dependencies required! See `go.mod` for module definition. + +## Development + +### Unit Tests and Coverage + +```bash +# Run tests +go test -v ./... + +# Run tests with coverage +go test -coverprofile=coverage.out ./... +go tool cover -func=coverage.out +``` + +### Testing + +Test the endpoints using curl: + +```bash +# Test main endpoint +curl http://localhost:8080/ | jq + +# Test health endpoint +curl http://localhost:8080/health | jq +``` + +Or use a browser to visit: +- `http://localhost:8080/` +- `http://localhost:8080/health` + + +## Docker + +The application is available as a containerized Docker image using multi-stage builds for minimal size and maximum security. + +### Running with Docker + +Pull and run the image: + +```bash +docker pull /devops-go-multistage:latest +docker run -d -p 8080:8080 --name devops-go /devops-go-multistage:latest +``` + +### Building Locally + +Build the multi-stage Docker image: + +```bash +docker build -t devops-go-multistage:latest . +``` + +Run the container: + +```bash +docker run -d -p 8080:8080 --name devops-go devops-go-multistage:latest +``` + +### Testing the Container + +```bash +# Health check +curl http://localhost:8080/health + +# Service information +curl http://localhost:8080/ | jq +``` + +### Docker Image Features + +- **Multi-Stage Build**: Separate build and runtime stages for minimal size +- **Size**: ~15MB (95% smaller than single-stage build) +- **Security**: Runs as non-root user, minimal attack surface +- **Base**: Alpine Linux 3.19 for small size and security +- **Health Check**: Built-in health monitoring for orchestration + +For detailed documentation on the multi-stage build strategy, see [`docs/LAB02.md`](docs/LAB02.md). + +--- + +## Advantages of Go Implementation + +1. **Single Binary**: No runtime dependencies, easy deployment +2. **Fast Compilation**: Quick build times for rapid iteration +3. **Small Binary Size**: Efficient for containerized deployments +4. **Fast Execution**: Compiled code runs faster than interpreted languages +5. **Concurrent by Design**: Built-in goroutines for future scalability +6. **Cross-Platform**: Easy to build for multiple platforms + diff --git a/app_go/coverage.out b/app_go/coverage.out new file mode 100644 index 0000000000..e2520abbc5 --- /dev/null +++ b/app_go/coverage.out @@ -0,0 +1,32 @@ +mode: set +devops-info-service/main.go:65.27,67.16 2 1 +devops-info-service/main.go:67.16,69.3 1 0 +devops-info-service/main.go:70.2,70.17 1 1 +devops-info-service/main.go:73.43,79.15 5 1 +devops-info-service/main.go:79.15,81.17 2 1 +devops-info-service/main.go:81.17,83.4 1 1 +devops-info-service/main.go:84.3,84.30 1 1 +devops-info-service/main.go:86.2,86.17 1 1 +devops-info-service/main.go:86.17,88.19 2 1 +devops-info-service/main.go:88.19,90.4 1 0 +devops-info-service/main.go:91.3,91.30 1 1 +devops-info-service/main.go:93.2,93.33 1 1 +devops-info-service/main.go:93.33,95.16 2 1 +devops-info-service/main.go:95.16,97.4 1 1 +devops-info-service/main.go:98.3,98.30 1 1 +devops-info-service/main.go:101.2,101.34 1 1 +devops-info-service/main.go:104.42,106.14 2 1 +devops-info-service/main.go:106.14,108.3 1 1 +devops-info-service/main.go:109.2,110.14 2 1 +devops-info-service/main.go:110.14,112.3 1 1 +devops-info-service/main.go:113.2,114.50 2 1 +devops-info-service/main.go:114.50,116.3 1 1 +devops-info-service/main.go:117.2,117.11 1 1 +devops-info-service/main.go:120.58,158.2 4 1 +devops-info-service/main.go:160.60,172.2 5 1 +devops-info-service/main.go:174.53,176.33 2 1 +devops-info-service/main.go:176.33,178.3 1 1 +devops-info-service/main.go:179.2,179.54 1 1 +devops-info-service/main.go:182.13,187.16 4 0 +devops-info-service/main.go:187.16,189.3 1 0 +devops-info-service/main.go:191.2,191.36 1 0 diff --git a/app_go/docs/GO.md b/app_go/docs/GO.md new file mode 100644 index 0000000000..2aef5fead7 --- /dev/null +++ b/app_go/docs/GO.md @@ -0,0 +1,61 @@ +# Why Go? + +Go (Golang) was chosen as the compiled language for the DevOps Info Service. Here's why: + +## Key Advantages + +### 1. Simple and Easy to Learn +- Minimal syntax, easy to read +- No complex inheritance (uses composition) +- Explicit error handling (no hidden exceptions) +- Automatic memory management + +### 2. Great Standard Library +- Built-in HTTP server (`net/http`) - no framework needed +- JSON support included +- System information access +- **Zero external dependencies** for this service + +### 3. Fast and Efficient +- Quick compilation (~1-2 seconds) +- Small binary size (~6-8 MB) +- Single executable file - no runtime needed +- Perfect for containers + +### 4. DevOps-Friendly +- Used by major DevOps tools: + - Docker, Kubernetes, Terraform + - Prometheus, Consul, Vault +- Easy cross-compilation +- Built-in concurrency support (goroutines) + +### 5. Production-Ready +- Used by Google, Uber, Dropbox, Cloudflare +- Strong tooling (`go fmt`, `go vet`, `go test`) +- Excellent documentation +- Active community + +## Quick Comparison + +| Feature | Go | Rust | Java | +|---------|----|----|------| +| Learning Curve | Easy | Hard | Moderate | +| Compile Speed | Very Fast | Slow | Fast | +| Binary Size | Small (6-8 MB) | Very Small | Large (needs JVM) | +| Runtime | None | None | JVM required | + +## Conclusion + +Go provides the best balance of: +- **Simplicity** - Easy to learn and understand +- **Performance** - Fast compilation and execution +- **Deployment** - Single binary, no dependencies +- **Ecosystem** - Aligned with DevOps tools + +Perfect choice for this service! + +## Resources + +- [Go Official Website](https://go.dev/) +- [Go Documentation](https://go.dev/doc/) +- [Go Standard Library](https://pkg.go.dev/std) diff --git a/app_go/docs/LAB01.md b/app_go/docs/LAB01.md new file mode 100644 index 0000000000..60badf5891 --- /dev/null +++ b/app_go/docs/LAB01.md @@ -0,0 +1,137 @@ +# Lab 01 - Go Implementation + +Go implementation of the DevOps Info Service (bonus task). Same functionality as Python version with compiled language advantages. + +## Implementation + +### Features +- Uses only Go standard library (no external dependencies) +- Single binary deployment (~6-8 MB) +- Fast compilation and execution +- Cross-platform support + +### Code Structure +```go +package main + +import ( + "encoding/json" + "net/http" + "os" + "runtime" + "time" +) + +// Data structures for JSON responses +type ServiceInfo struct { ... } +type HealthResponse struct { ... } + +// Global start time for uptime +var startTime = time.Now() + +// Handlers +func mainHandler(w http.ResponseWriter, r *http.Request) { ... } +func healthHandler(w http.ResponseWriter, r *http.Request) { ... } +``` + +## Build + +### Development +```bash +go build -o devops-info-service main.go +``` +Size: ~8.5 MB + +### Production (Optimized) +```bash +go build -ldflags="-s -w" -o devops-info-service main.go +``` +Size: ~6.2 MB + +### Cross-Platform +```bash +GOOS=linux GOARCH=amd64 go build -o devops-info-service-linux main.go +GOOS=windows GOARCH=amd64 go build -o devops-info-service.exe main.go +``` + +## API Endpoints + +### `GET /` +Returns service and system information. + +**Response:** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "framework": "Go net/http" + }, + "system": { + "hostname": "my-laptop", + "platform": "darwin", + "architecture": "arm64", + "cpu_count": 8 + }, + "runtime": { + "uptime_seconds": 1234.56, + "uptime_human": "0 hours, 20 minutes, 34 seconds" + } +} +``` + +### `GET /health` +Health check endpoint for monitoring. + +**Response:** +```json +{ + "status": "healthy", + "timestamp": "2026-01-31T17:30:00.000Z", + "uptime_seconds": 1234.56 +} +``` + +## Comparison + +| Aspect | Python | Go | +|--------|--------|-----| +| Dependencies | Flask (external) | None (stdlib) | +| Binary Size | N/A | ~6-8 MB | +| Deployment | Runtime + deps | Single binary | +| Startup Time | ~100-200ms | ~10-20ms | +| Memory Usage | ~30-50 MB | ~5-10 MB | + +**Go Advantages:** +- Single binary deployment +- Faster execution +- Lower memory footprint +- No runtime dependencies +- Better for containers + +## Testing + +Screenshots available in `docs/screenshots/`: +1. Build process +2. Main endpoint response +3. Health check response + +**Example:** +```bash +# Build +go build -o devops-info-service main.go + +# Run +./devops-info-service + +# Test +curl http://localhost:8080/ | jq +curl http://localhost:8080/health | jq +``` + +## Key Features + +1. **System Information**: Uses `runtime` package for system info +2. **Uptime Calculation**: Tracks start time and formats human-readable +3. **Client IP Detection**: Handles proxy headers correctly +4. **Environment Variables**: Configurable via `PORT` env var diff --git a/app_go/docs/LAB02.md b/app_go/docs/LAB02.md new file mode 100644 index 0000000000..89dfce2a5b --- /dev/null +++ b/app_go/docs/LAB02.md @@ -0,0 +1,194 @@ +# Lab 2 — Multi-Stage Docker Build + +## Overview + +Multi-stage builds solve a critical problem: **build environment is much larger than runtime needs**. + +**Problem:** +- Compiling Go requires full Go SDK (~300MB) +- Runtime only needs compiled binary (~6-8MB) + +**Solution:** +- **Stage 1 (Builder):** Compile application +- **Stage 2 (Runtime):** Copy only the binary to minimal image + +## Dockerfile Breakdown + +### Stage 1: Builder + +```dockerfile +FROM golang:1.21-alpine AS builder +RUN apk add --no-cache git ca-certificates +WORKDIR /build +COPY go.mod go.sum* ./ +RUN go mod download +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build \ + -ldflags="-s -w" \ + -a -installsuffix cgo \ + -o devops-info-service \ + main.go +``` + +**Key Points:** +- `CGO_ENABLED=0`: Creates static binary (no C dependencies) +- `-ldflags="-s -w"`: Strips debug info to reduce size +- Copy `go.mod` before source code for better caching + +### Stage 2: Runtime + +```dockerfile +FROM alpine:3.19 +RUN apk --no-cache add ca-certificates +RUN addgroup -g 1000 appuser && \ + adduser -D -u 1000 -G appuser appuser +WORKDIR /app +COPY --from=builder /build/devops-info-service . +RUN chown -R appuser:appuser /app +USER appuser +EXPOSE 8080 +ENV PORT=8080 +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1 +CMD ["./devops-info-service"] +``` + +**Key Points:** +- Alpine base (~7MB) with shell for debugging +- Non-root user for security +- Health check for monitoring + +## Size Comparison + +### Terminal Output + +**Check final image size:** +```bash +$ docker images devops-go-multistage +IMAGE ID DISK USAGE CONTENT SIZE +devops-go-multistage:latest 8b972207d848 27.3MB 7.8MB +``` + +**Note:** Builder stage (`golang:1.21-alpine` ~310MB) is not saved in final images - only the runtime stage remains. + +**Verify package count:** +```bash +$ docker run --rm devops-go-multistage apk list | wc -l + 16 +``` + +### Size Analysis + +| Image Type | Size | Note | +|------------|------|------| +| Single-Stage (golang:alpine) | ~310MB | Includes build tools | +| **Multi-Stage (final)** | **27.3MB** | Only runtime necessities | +| **Reduction** | **92%** | 12x smaller | + +**Benefits:** +- Faster deployments (12x smaller = 12x faster pulls) +- Lower storage costs (92% less space) +- Better scalability +- Minimal packages (16 vs ~500+) + +## Build & Run + +### Build +```bash +cd app_go +docker build -t devops-go-multistage:latest . +``` + +### Run +```bash +docker run -d -p 8080:8080 --name devops-go devops-go-multistage:latest +``` + +### Test +```bash +curl http://localhost:8080/health +curl http://localhost:8080/ | jq +``` + +### Verify Security +```bash +docker exec devops-go whoami +docker images devops-go-multistage +``` + +## Security Benefits + +### 1. Minimal Attack Surface +- Fewer packages (16 vs ~500+) +- Fewer vulnerabilities to patch +- Smaller image = less exposure + +### 2. No Build Tools in Production +- No compiler or source code in final image +- Only runtime necessities +- Follows principle of least privilege + +### 3. Non-Root Execution +- Runs as UID 1000 (not root) +- Limited permissions +- Reduces impact if compromised + +### 4. Static Binary +- No dynamic linking vulnerabilities +- Self-contained with no dependencies + +## Key Decisions + +### 1. Alpine vs Scratch vs Distroless +**Chose Alpine** for balance between size and usability: +- Shell access for debugging +- Package manager available +- Only ~7MB base + +### 2. CGO_ENABLED=0 +Creates static binary with no C dependencies: +- Fully portable +- No libc vulnerabilities +- Can use minimal base images + +### 3. Layer Ordering +Copy `go.mod` before source code: +- Dependencies cached separately +- Faster rebuilds when only code changes + +### 4. Build Flags +`-ldflags="-s -w"` strips debug info: +- ~20% size reduction +- Acceptable for production + +## Why Multi-Stage Builds Matter + +**Problem:** Compiled languages need large build tools but small runtime +- Build: Requires compiler (~300MB+) +- Runtime: Only needs binary (~6-8MB) + +**Solution:** Multi-stage builds separate these phases +- Stage 1: Build with full toolchain +- Stage 2: Copy only binary to minimal image + +**Impact:** +- 95% size reduction +- 20x faster deployments +- Lower storage costs +- Better security + +## Summary + +### Achievements +- Multi-stage Dockerfile with 95% size reduction +- Security hardening (non-root user, minimal attack surface) +- Optimized layer caching +- Production-ready with health checks + +### Best Practices Applied +- Non-root user execution +- Minimal base image (Alpine) +- Static binary compilation +- Layer caching optimization +- Health check for monitoring + diff --git a/app_go/docs/LAB03.md b/app_go/docs/LAB03.md new file mode 100644 index 0000000000..69e7154ffb --- /dev/null +++ b/app_go/docs/LAB03.md @@ -0,0 +1,165 @@ +# Lab 3 Bonus — Multi-App CI with Path Filters + Test Coverage + +## Part 1: Multi-App CI + +### 1.1 Second CI Workflow: Go + +**File:** `.github/workflows/go-ci.yml` + +**Implementation:** +- **Linter:** golangci-lint (standard for Go) +- **Tests:** `go test -v -race -coverprofile=coverage.out` +- **Docker:** Build & push with CalVer (same strategy as Python) +- **Actions:** `actions/setup-go@v5`, `golangci/golangci-lint-action@v6`, `docker/build-push-action@v6` + +**Versioning:** CalVer (`YYYY.MM.BUILD`) aligned with Python workflow. + +**Docker image:** `mirana18/devops-info-service-go` + +### 1.2 Path-Based Triggers + +| Workflow | Triggers on changes to | +|-------------|----------------------------------------------------------| +| Python CI | `app_python/**`, `.github/workflows/python-ci.yml` | +| Go CI | `app_go/**`, `.github/workflows/go-ci.yml` | + +**No workflow runs** when only these change: +- `docs/`, `labs/`, `lectures/` +- `README.md`, `.gitignore` +- Root-level or other non-app files + +**Selective triggering:** +- Change only `app_python/app.py` → Python CI runs, Go CI does not +- Change only `app_go/main.go` → Go CI runs, Python CI does not +- Change `app_python/` and `app_go/` in one commit → both run in parallel + +### 1.3 Benefits of Path Filters + +| Benefit | Description | +|---------------------|-----------------------------------------------------------------------------| +| **Faster feedback** | Only relevant workflows run → shorter queue and quicker results | +| **Cost savings** | Fewer GitHub Actions minutes spent on unrelated changes | +| **Parallel runs** | Python and Go pipelines are independent and can run at the same time | +| **Clear ownership** | Each app has its own pipeline | +| **Doc-safe** | Updates to docs/labs do not trigger builds or Docker pushes | + +### 1.4 Proof of Selective Triggering + +**Scenario 1: Only Python changes** + +``` +Modified files: app_python/app.py +→ Python CI: runs +→ Go CI: skipped (no matching paths) +``` + +**Scenario 2: Only Go changes** + +``` +Modified files: app_go/main.go +→ Python CI: skipped +→ Go CI: runs +``` + +**Scenario 3: Both apps changed** + +``` +Modified files: app_python/app.py, app_go/main.go +→ Python CI: runs +→ Go CI: runs (in parallel) +``` + +--- + +## Part 2: Test Coverage + +### 2.1 Coverage Tools + +| App | Tool | Command | Output | +|--------|---------------|------------------------------------------------------|---------------------| +| Python | pytest-cov | `pytest --cov=. --cov-report=xml --cov-fail-under=70` | `coverage.xml` | +| Go | go test | `go test -coverprofile=coverage.out ./...` | `coverage.out` | + +### 2.2 Codecov Integration + +- **Service:** codecov.io +- **Action:** `codecov/codecov-action@v4` +- **Flags:** `python` and `go` for separate reporting +- **Token:** Optional `CODECOV_TOKEN` in GitHub Secrets (works for public repos without it, with `fail_ci_if_error: false`) + +### 2.3 Coverage Badges + +Added to README files: + +- **app_python/README.md:** Python CI badge + Codecov (python flag) +- **app_go/README.md:** Go CI badge + Codecov (go flag) + +**Badge URLs:** +``` +https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg +https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/go-ci.yml/badge.svg +https://codecov.io/gh/Arino4kaMyr/DevOps-Core-Course/graph/badge.svg?flag=python +https://codecov.io/gh/Arino4kaMyr/DevOps-Core-Course/graph/badge.svg?flag=go +``` + +### 2.4 Coverage Analysis + +#### Python + +| Metric | Value | +|---------------|--------------| +| Threshold | 70% (`--cov-fail-under=70`) | +| Covered | Endpoints (`/`, `/health`), helpers, error handling, integration tests | +| Not covered | `if __name__ == '__main__'` block, some internal error handlers | + +**What’s tested:** +- `GET /` — JSON structure, required fields, types +- `GET /health` — status, timestamp, uptime +- 404, 405 responses +- `format_uptime()`, `get_system_info()` +- Basic integration scenarios + +**Deliberately not covered:** +- Main entry point (`main` block) +- Rare error paths that are hard to trigger in tests + +#### Go + +| Metric | Value | +|---------------|--------------| +| Approx. coverage | ~85% (from `go test -coverprofile`) | +| Covered | mainHandler, healthHandler, formatUptime, getClientIP | +| Not covered | `main()` (server startup), error branches in getHostname | + +**What’s tested:** +- `mainHandler` — service/system/runtime/request/endpoints +- `healthHandler` — status, timestamp, uptime +- `formatUptime` — 0s, 1s, 65s, 3661s, 7200s +- `getClientIP` — X-Forwarded-For, X-Real-Ip + +### 2.5 Coverage Threshold in CI + +**Python:** CI fails if coverage drops below 70%. + +```yaml +pytest --cov=. --cov-report=xml --cov-fail-under=70 +``` + +**Go:** No explicit threshold yet; coverage is collected and sent to Codecov for reporting. + +--- + +## Summary + +| Requirement | Status | +|-------------------------------------|--------| +| Second workflow for Go | ✅ `go-ci.yml` | +| Path filters for Python | ✅ `app_python/**` | +| Path filters for Go | ✅ `app_go/**` | +| Both workflows run in parallel | ✅ Independent triggers | +| Coverage tool (pytest-cov, go test) | ✅ | +| Coverage reports in CI | ✅ | +| Codecov integration | ✅ | +| Coverage badges in README | ✅ | +| Coverage threshold (Python ≥70%) | ✅ | +| Documentation of coverage | ✅ | diff --git a/app_go/docs/screenshots/01-main-endpoint.jpg b/app_go/docs/screenshots/01-main-endpoint.jpg new file mode 100644 index 0000000000..fb65c21880 Binary files /dev/null and b/app_go/docs/screenshots/01-main-endpoint.jpg differ diff --git a/app_go/docs/screenshots/02-health-check.jpg b/app_go/docs/screenshots/02-health-check.jpg new file mode 100644 index 0000000000..9204375b09 Binary files /dev/null and b/app_go/docs/screenshots/02-health-check.jpg differ diff --git a/app_go/docs/screenshots/03-formatted-output.jpg b/app_go/docs/screenshots/03-formatted-output.jpg new file mode 100644 index 0000000000..1cbcf99c61 Binary files /dev/null and b/app_go/docs/screenshots/03-formatted-output.jpg differ diff --git a/app_go/go.mod b/app_go/go.mod new file mode 100644 index 0000000000..307ce0d1c5 --- /dev/null +++ b/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.21 diff --git a/app_go/main.go b/app_go/main.go new file mode 100644 index 0000000000..9ff077591f --- /dev/null +++ b/app_go/main.go @@ -0,0 +1,229 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +type ServiceInfo struct { + Service Service `json:"service"` + System System `json:"system"` + Runtime Runtime `json:"runtime"` + Request Request `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +type Service struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type System struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + PlatformVersion string `json:"platform_version"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + GoVersion string `json:"go_version"` +} + +type Runtime struct { + UptimeSeconds float64 `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type Request struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type HealthResponse struct { + Status string `json:"status"` + Timestamp string `json:"timestamp"` + UptimeSeconds float64 `json:"uptime_seconds"` +} + +var startTime = time.Now() + +type statusCapturingResponseWriter struct { + http.ResponseWriter + status int +} + +func (w *statusCapturingResponseWriter) WriteHeader(statusCode int) { + w.status = statusCode + w.ResponseWriter.WriteHeader(statusCode) +} + +func withAccessLog(next http.HandlerFunc) http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + sw := &statusCapturingResponseWriter{ResponseWriter: w, status: http.StatusOK} + + next(sw, r) + + durationMs := float64(time.Since(start).Nanoseconds()) / 1e6 + log.Printf( + "request completed method=%s path=%s status=%d client_ip=%s duration_ms=%.2f", + r.Method, + r.URL.Path, + sw.status, + getClientIP(r), + durationMs, + ) + } +} + +func getHostname() string { + hostname, err := os.Hostname() + if err != nil { + return "unknown" + } + return hostname +} + +func formatUptime(seconds float64) string { + hours := int(seconds) / 3600 + minutes := int(seconds) % 3600 / 60 + secs := int(seconds) % 60 + + parts := []string{} + if hours > 0 { + part := fmt.Sprintf("%d hour", hours) + if hours != 1 { + part += "s" + } + parts = append(parts, part) + } + if minutes > 0 { + part := fmt.Sprintf("%d minute", minutes) + if minutes != 1 { + part += "s" + } + parts = append(parts, part) + } + if secs > 0 || len(parts) == 0 { + part := fmt.Sprintf("%d second", secs) + if secs != 1 { + part += "s" + } + parts = append(parts, part) + } + + return strings.Join(parts, ", ") +} + +func getClientIP(r *http.Request) string { + ip := r.Header.Get("X-Forwarded-For") + if ip != "" { + return strings.Split(ip, ",")[0] + } + ip = r.Header.Get("X-Real-Ip") + if ip != "" { + return ip + } + ip = r.RemoteAddr + if idx := strings.LastIndex(ip, ":"); idx != -1 { + ip = ip[:idx] + } + return ip +} + +func mainHandler(w http.ResponseWriter, r *http.Request) { + uptimeSeconds := time.Since(startTime).Seconds() + + info := ServiceInfo{ + Service: Service{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "Go net/http", + }, + System: System{ + Hostname: getHostname(), + Platform: runtime.GOOS, + PlatformVersion: runtime.Version(), + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + GoVersion: runtime.Version(), + }, + Runtime: Runtime{ + UptimeSeconds: roundFloat(uptimeSeconds, 2), + UptimeHuman: formatUptime(uptimeSeconds), + CurrentTime: time.Now().UTC().Format("2006-01-02T15:04:05.000Z"), + Timezone: "UTC", + }, + Request: Request{ + ClientIP: getClientIP(r), + UserAgent: r.Header.Get("User-Agent"), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(info); err != nil { + log.Printf("failed to encode response: %v", err) + } +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + uptimeSeconds := time.Since(startTime).Seconds() + + health := HealthResponse{ + Status: "healthy", + Timestamp: time.Now().UTC().Format("2006-01-02T15:04:05.000Z"), + UptimeSeconds: roundFloat(uptimeSeconds, 2), + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusOK) + if err := json.NewEncoder(w).Encode(health); err != nil { + log.Printf("failed to encode health response: %v", err) + } +} + +func roundFloat(val float64, precision int) float64 { + multiplier := 1.0 + for i := 0; i < precision; i++ { + multiplier *= 10 + } + return float64(int(val*multiplier+0.5)) / multiplier +} + +func main() { + http.HandleFunc("/", withAccessLog(mainHandler)) + http.HandleFunc("/health", withAccessLog(healthHandler)) + + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + log.Printf("starting server port=%s", port) + if err := http.ListenAndServe(":"+port, nil); err != nil { + log.Fatalf("server failed: %v", err) + } +} diff --git a/app_go/main_test.go b/app_go/main_test.go new file mode 100644 index 0000000000..a989a49b1c --- /dev/null +++ b/app_go/main_test.go @@ -0,0 +1,150 @@ +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" +) + +func TestMainHandler(t *testing.T) { + req := httptest.NewRequest("GET", "/", nil) + req.Header.Set("User-Agent", "TestClient/1.0") + w := httptest.NewRecorder() + + mainHandler(w, req) + + resp := w.Result() + if resp.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", resp.StatusCode) + } + + contentType := resp.Header.Get("Content-Type") + if !strings.Contains(contentType, "application/json") { + t.Errorf("expected JSON content type, got %s", contentType) + } + + var info ServiceInfo + if err := json.NewDecoder(resp.Body).Decode(&info); err != nil { + t.Fatalf("failed to decode JSON: %v", err) + } + + // Verify service info + if info.Service.Name != "devops-info-service" { + t.Errorf("expected service name 'devops-info-service', got %s", info.Service.Name) + } + if info.Service.Version != "1.0.0" { + t.Errorf("expected version '1.0.0', got %s", info.Service.Version) + } + if info.Service.Framework != "Go net/http" { + t.Errorf("expected framework 'Go net/http', got %s", info.Service.Framework) + } + + // Verify system info + if info.System.Hostname == "" { + t.Error("expected non-empty hostname") + } + if info.System.Platform == "" { + t.Error("expected non-empty platform") + } + if info.System.CPUCount <= 0 { + t.Errorf("expected positive CPU count, got %d", info.System.CPUCount) + } + if info.System.GoVersion == "" { + t.Error("expected non-empty Go version") + } + + // Verify runtime info + if info.Runtime.UptimeSeconds < 0 { + t.Errorf("expected non-negative uptime, got %f", info.Runtime.UptimeSeconds) + } + if info.Runtime.Timezone != "UTC" { + t.Errorf("expected timezone 'UTC', got %s", info.Runtime.Timezone) + } + + // Verify request info + if info.Request.Method != "GET" { + t.Errorf("expected method GET, got %s", info.Request.Method) + } + if info.Request.Path != "/" { + t.Errorf("expected path '/', got %s", info.Request.Path) + } + + // Verify endpoints list + if len(info.Endpoints) < 2 { + t.Errorf("expected at least 2 endpoints, got %d", len(info.Endpoints)) + } +} + +func TestHealthHandler(t *testing.T) { + req := httptest.NewRequest("GET", "/health", nil) + w := httptest.NewRecorder() + + healthHandler(w, req) + + resp := w.Result() + if resp.StatusCode != http.StatusOK { + t.Errorf("expected status 200, got %d", resp.StatusCode) + } + + var health HealthResponse + if err := json.NewDecoder(resp.Body).Decode(&health); err != nil { + t.Fatalf("failed to decode JSON: %v", err) + } + + if health.Status != "healthy" { + t.Errorf("expected status 'healthy', got %s", health.Status) + } + if health.Timestamp == "" { + t.Error("expected non-empty timestamp") + } + if health.UptimeSeconds < 0 { + t.Errorf("expected non-negative uptime, got %f", health.UptimeSeconds) + } +} + +func TestFormatUptime(t *testing.T) { + tests := []struct { + seconds float64 + contains []string + }{ + {0, []string{"0 second"}}, + {1, []string{"1 second"}}, + {65, []string{"1 minute", "5 seconds"}}, + {3661, []string{"1 hour", "1 minute", "1 second"}}, + {7200, []string{"2 hours"}}, + } + + for _, tt := range tests { + result := formatUptime(tt.seconds) + for _, s := range tt.contains { + if !strings.Contains(result, s) { + t.Errorf("formatUptime(%f) = %q, expected to contain %q", tt.seconds, result, s) + } + } + } +} + +func TestGetClientIP(t *testing.T) { + tests := []struct { + name string + header string + value string + want string + }{ + {"X-Forwarded-For", "X-Forwarded-For", "192.168.1.1", "192.168.1.1"}, + {"X-Real-Ip", "X-Real-Ip", "10.0.0.1", "10.0.0.1"}, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + req := httptest.NewRequest("GET", "/", nil) + req.Header.Set(tt.header, tt.value) + got := getClientIP(req) + if got != tt.want { + t.Errorf("getClientIP() = %q, want %q", got, tt.want) + } + }) + } +} diff --git a/app_python/.dockerignore b/app_python/.dockerignore new file mode 100644 index 0000000000..460f471617 --- /dev/null +++ b/app_python/.dockerignore @@ -0,0 +1,44 @@ +# Python cache and compiled files +__pycache__/ +*.py[cod] +*$py.class +*.so + +# Virtual environments +venv/ +.venv/ +env/ +ENV/ + +# IDE and editor files +.vscode/ +.idea/ +*.swp +*.swo +*~ +.DS_Store + +# Version control +.git/ +.gitignore +.gitattributes + +# Documentation and screenshots +docs/ +*.md +README.md + +# Test files +tests/ +*.pytest_cache/ +.coverage +htmlcov/ + +# Docker files +Dockerfile +.dockerignore + +# Other development files +*.log +.env +.env.* diff --git a/app_python/.gitignore b/app_python/.gitignore new file mode 100644 index 0000000000..063f8d4ed4 --- /dev/null +++ b/app_python/.gitignore @@ -0,0 +1,13 @@ +# Python +__pycache__/ +*.py[cod] +venv/ +*.log + +# IDE +.vscode/ +.idea/ + +# OS +.DS_Store + diff --git a/app_python/Dockerfile b/app_python/Dockerfile new file mode 100644 index 0000000000..ffcdc176a7 --- /dev/null +++ b/app_python/Dockerfile @@ -0,0 +1,27 @@ +FROM python:3.13-slim + +WORKDIR /app + +RUN groupadd -r appuser && \ + useradd -r -g appuser -s /bin/bash -u 1001 appuser + +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY app.py . + +RUN chown -R appuser:appuser /app + +USER appuser + +EXPOSE 5001 + +ENV HOST=0.0.0.0 \ + PORT=5001 \ + DEBUG=False + +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5001/health')" || exit 1 + +CMD ["python", "app.py"] diff --git a/app_python/README.md b/app_python/README.md new file mode 100644 index 0000000000..f3217d4234 --- /dev/null +++ b/app_python/README.md @@ -0,0 +1,308 @@ +# DevOps Info Service - Python + +[![Python CI](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg)](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/python-ci.yml) +[![codecov](https://codecov.io/github/Arino4kaMyr/DevOps-Core-Course/graph/badge.svg?flag=python)](https://codecov.io/github/Arino4kaMyr/DevOps-Core-Course?flag=python) + +A production-ready web service that provides comprehensive information about itself and its runtime environment. Built with Flask framework. + +## Overview + +The DevOps Info Service is a RESTful API that exposes system information, runtime metrics, and health status. This service serves as the foundation for the DevOps course and will evolve throughout the course with containerization, CI/CD, monitoring, and persistence features. + +**Key Features:** +- System information endpoint (`GET /`) +- Health check endpoint (`GET /health`) +- Configurable via environment variables +- Production-ready error handling and logging + +## Prerequisites + +- **Python:** 3.11 or higher +- **pip:** Python package manager +- **Virtual environment:** Recommended for dependency isolation + +## Installation + +1. **Clone the repository:** + ```bash + git clone + cd DevOps-Core-Course/app_python + ``` + +2. **Create a virtual environment:** + ```bash + python -m venv venv + ``` + +3. **Activate the virtual environment:** + ```bash + # On macOS/Linux: + source venv/bin/activate + + # On Windows: + venv\Scripts\activate + ``` + +4. **Install dependencies:** + ```bash + pip install -r requirements.txt + ``` + +## Running the Application + +### Basic Usage + +Run the application with default settings (host: `0.0.0.0`, port: `5001`): + +```bash +python app.py +``` + +### Custom Configuration + +Configure the application using environment variables: + +```bash +# Custom port +PORT=8080 python app.py + +# Custom host and port +HOST=127.0.0.1 PORT=3000 python app.py + +# Enable debug mode +DEBUG=true python app.py +``` + +The service will be available at `http://:` + +## API Endpoints + +### `GET /` + +Returns comprehensive service and system information. + +**Response:** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": { + "hostname": "my-laptop", + "platform": "Darwin", + "platform_version": "25.2.0", + "architecture": "arm64", + "cpu_count": 8, + "python_version": "3.13.1" + }, + "runtime": { + "uptime_seconds": 3600.5, + "uptime_human": "1 hour, 0 minutes, 0 seconds", + "current_time": "2026-01-31T17:30:00.000Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/7.81.0", + "method": "GET", + "path": "/" + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] +} +``` + +**Example Request:** +```bash +curl http://localhost:5001/ +``` + +### `GET /health` + +Simple health check endpoint for monitoring and Kubernetes probes. + +**Response:** +```json +{ + "status": "healthy", + "timestamp": "2026-01-31T17:30:00.000Z", + "uptime_seconds": 3600.5 +} +``` + +**Status Codes:** +- `200 OK`: Service is healthy + +**Example Request:** +```bash +curl http://localhost:5001/health +``` + +## Configuration + +The application can be configured using the following environment variables: + +| Variable | Default | Description | +|----------|---------|-------------| +| `HOST` | `0.0.0.0` | Host address to bind the server | +| `PORT` | `5001` | Port number to listen on | +| `DEBUG` | `False` | Enable debug mode (set to `true` to enable) | + +## Project Structure + +``` +app_python/ +├── app.py # Main application +├── requirements.txt # Python dependencies +├── .gitignore # Git ignore rules +├── README.md # This file +├── tests/ # Unit tests (for Lab 3) +│ └── __init__.py +└── docs/ # Documentation + ├── LAB01.md # Lab submission documentation + └── screenshots/ # Screenshots and proof of work +``` + +## Dependencies + +- **Flask 3.1.0** - Lightweight web framework + +See `requirements.txt` for pinned versions. + +## Docker + +The application is containerized and available on Docker Hub for easy deployment. + +### Prerequisites + +- **Docker:** 25+ or compatible version +- **Docker Hub account:** For pulling public images (optional for local builds) + +### Building the Image Locally + +Build the Docker image from source: + +```bash +cd app_python + +docker build -t : . + +# Example: +docker build -t devops-info-service:latest . +``` + +### Running a Container + +Run the containerized application with port mapping: + +```bash +docker run -d -p : --name : + +# Example with default settings: +docker run -d -p 5001:5001 --name devops-app devops-info-service:latest + +# Example with custom port and environment variables: +docker run -d -p 8080:5001 \ + -e PORT=5001 \ + -e DEBUG=false \ + --name devops-app \ + devops-info-service:latest +``` + +**Access the application:** +- Main endpoint: `http://localhost:5001/` +- Health check: `http://localhost:5001/health` + +### Pulling from Docker Hub + +Pull and run the pre-built image from Docker Hub: + +```bash +docker pull /: + +# Example: +docker pull mirana18/devops-info-service:latest + +# Run the pulled image +docker run -d -p 5001:5001 --name devops-app mirana18/devops-info-service:latest +``` + +### Container Management + +```bash +# View running containers +docker ps + +# View container logs +docker logs +docker logs devops-app + +# Stop a container +docker stop + +# Remove a container +docker rm + +# Stop and remove in one command +docker stop devops-app && docker rm devops-app +``` + +### Image Information + +- **Base Image:** `python:3.13-slim` +- **Exposed Port:** `5001` +- **User:** Non-root user (`appuser`) +- **Health Check:** Built-in health check on `/health` endpoint +- **Image Size:** ~150MB (optimized with slim base and minimal dependencies) + +### Docker Hub Repository + +**Official Image:** [docker.io/mirana18/devops-info-service](https://hub.docker.com/r/mirana18/devops-info-service) + +Available tags: +- `latest` - Most recent stable version +- `1.0.0` - Semantic versioning tags +- `lab02` - Lab-specific versions + +## Development + +### Unit Tests and Coverage + +```bash +# Install dev dependencies +pip install -r requirements-dev.txt + +# Run tests +pytest -v + +# Run tests with coverage (70% threshold enforced in CI) +pytest --cov=. --cov-report=term-missing --cov-fail-under=70 +``` + +**Coverage:** CI fails if coverage drops below 70%. Current coverage includes: +- All API endpoints (`GET /`, `GET /health`) +- JSON structure and required fields validation +- Error handling (404, 405) +- Helper functions (`format_uptime`, `get_system_info`) + +### Testing + +Test the endpoints using curl: + +```bash +# Test main endpoint +curl http://localhost:5001/ | jq + +# Test health endpoint +curl http://localhost:5001/health | jq +``` + +Or use a browser to visit: +- `http://localhost:5001/` +- `http://localhost:5001/health` + diff --git a/app_python/app.py b/app_python/app.py new file mode 100644 index 0000000000..860f26ff4c --- /dev/null +++ b/app_python/app.py @@ -0,0 +1,286 @@ +"""DevOps Info Service - Flask application for system information.""" +import logging +import os +import platform +import socket +import time +from datetime import datetime, timezone + +from flask import Flask, jsonify, request +from prometheus_client import CONTENT_TYPE_LATEST, Counter, Gauge, Histogram, generate_latest +from pythonjsonlogger import jsonlogger + + +def setup_json_logging(): + """Configure JSON logging for Loki/Grafana (Lab 7).""" + log_level = os.getenv("LOG_LEVEL", "INFO").upper() + root = logging.getLogger() + root.setLevel(getattr(logging, log_level, logging.INFO)) + handler = logging.StreamHandler() + formatter = jsonlogger.JsonFormatter( + "%(timestamp)s %(level)s %(name)s %(message)s", + rename_fields={"levelname": "level", "asctime": "timestamp"}, + ) + handler.setFormatter(formatter) + root.handlers = [handler] + + +setup_json_logging() +logger = logging.getLogger(__name__) + +try: + HOST = os.getenv('HOST', '0.0.0.0') + PORT = int(os.getenv('PORT', 5001)) + DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' +except ValueError as e: + logger.error(f"Invalid environment variable: {e}") + HOST = '0.0.0.0' + PORT = 5001 + DEBUG = False + +app = Flask(__name__) +start_time = time.time() + +http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests", + ["method", "endpoint", "status_code"], +) + +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds", + ["method", "endpoint"], +) + +http_requests_in_progress = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed", +) + + +@app.before_request +def log_request_start(): + """Log incoming request (context for JSON logs).""" + request.start_time = time.time() + http_requests_in_progress.inc() + logger.info( + "Request started", + extra={ + "method": request.method, + "path": request.path, + "client_ip": request.remote_addr or "unknown", + }, + ) + + +@app.after_request +def log_request_end(response): + """Log response status and duration.""" + endpoint = ( + request.url_rule.rule + if getattr(request, "url_rule", None) is not None + else request.path + ) + duration_seconds = time.time() - getattr(request, "start_time", time.time()) + duration_ms = duration_seconds * 1000 + + http_requests_total.labels( + method=request.method, + endpoint=endpoint, + status_code=str(response.status_code), + ).inc() + http_request_duration_seconds.labels( + method=request.method, + endpoint=endpoint, + ).observe(duration_seconds) + http_requests_in_progress.dec() + + logger.info( + "Request completed", + extra={ + "method": request.method, + "path": request.path, + "status_code": response.status_code, + "client_ip": request.remote_addr or "unknown", + "duration_ms": round(duration_ms, 2), + }, + ) + return response + + +@app.route("/metrics", methods=["GET"]) +def metrics(): + return generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST} + + +def format_uptime(seconds): + """Format uptime in seconds to human-readable string.""" + try: + hours = int(seconds // 3600) + minutes = int((seconds % 3600) // 60) + secs = int(seconds % 60) + + hour_str = f"{hours} hour{'s' if hours != 1 else ''}" + minute_str = f"{minutes} minute{'s' if minutes != 1 else ''}" + sec_str = f"{secs} second{'s' if secs != 1 else ''}" + + return f"{hour_str}, {minute_str}, {sec_str}" + except (ValueError, TypeError) as e: + logger.error(f"Error formatting uptime: {e}") + return "Unknown" + + +def get_system_info(): + """Get system information with error handling.""" + system_info = {} + + try: + system_info['hostname'] = socket.gethostname() + except (socket.error, OSError) as e: + logger.warning(f"Failed to get hostname: {e}") + system_info['hostname'] = 'Unknown' + + try: + system_info['platform'] = platform.system() + except Exception as e: + logger.warning(f"Failed to get platform: {e}") + system_info['platform'] = 'Unknown' + + try: + system_info['platform_version'] = platform.release() + except Exception as e: + logger.warning(f"Failed to get platform version: {e}") + system_info['platform_version'] = 'Unknown' + + try: + system_info['architecture'] = platform.machine() + except Exception as e: + logger.warning(f"Failed to get architecture: {e}") + system_info['architecture'] = 'Unknown' + + try: + cpu_count = os.cpu_count() + system_info['cpu_count'] = cpu_count if cpu_count is not None else 'Unknown' + except Exception as e: + logger.warning(f"Failed to get CPU count: {e}") + system_info['cpu_count'] = 'Unknown' + + try: + system_info['python_version'] = platform.python_version() + except Exception as e: + logger.warning(f"Failed to get Python version: {e}") + system_info['python_version'] = 'Unknown' + + return system_info + + +@app.route('/', methods=['GET']) +def main(): + """Main endpoint returning service and system information.""" + try: + logger.info("Main endpoint accessed", extra={"path": "/", "method": "GET"}) + uptime_seconds = time.time() - start_time + + system_info = get_system_info() + + response_data = { + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": system_info, + "runtime": { + "uptime_seconds": round(uptime_seconds, 2), + "uptime_human": format_uptime(uptime_seconds), + "current_time": datetime.now(timezone.utc).isoformat(), + "timezone": "UTC" + }, + "request": { + "client_ip": request.remote_addr or 'Unknown', + "user_agent": request.headers.get('User-Agent', 'Unknown'), + "method": request.method, + "path": request.path + }, + "endpoints": [ + { + "path": "/", + "method": "GET", + "description": "Service information" + }, + { + "path": "/health", + "method": "GET", + "description": "Health check" + } + ] + } + + return jsonify(response_data) + + except Exception as e: + logger.error(f"Error in main endpoint: {e}", exc_info=True) + return jsonify({ + "error": "Internal server error", + "message": str(e) + }), 500 + + +@app.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint for monitoring.""" + try: + uptime_seconds = time.time() - start_time + timestamp = datetime.now(timezone.utc).isoformat().replace( + '+00:00', '.000Z' + ) + + response_data = { + "status": "healthy", + "timestamp": timestamp, + "uptime_seconds": round(uptime_seconds, 2) + } + + logger.debug("Health check", extra={"status": "healthy"}) + return jsonify(response_data), 200 + + except Exception as e: + logger.error(f"Error in health check: {e}", exc_info=True) + return jsonify({ + "status": "unhealthy", + "error": str(e) + }), 500 + + +@app.errorhandler(404) +def not_found(error): + """Handle 404 errors.""" + logger.warning(f"404 error: {request.path}") + return jsonify({ + "error": "Not found", + "message": f"The requested path {request.path} was not found" + }), 404 + + +@app.errorhandler(500) +def internal_error(error): + """Handle 500 errors.""" + logger.error(f"500 error: {error}", exc_info=True) + return jsonify({ + "error": "Internal server error", + "message": "An unexpected error occurred" + }), 500 + + +if __name__ == "__main__": + logger.info( + "Starting application", + extra={"host": HOST, "port": PORT, "debug": DEBUG}, + ) + try: + app.run(host=HOST, port=PORT, debug=DEBUG) + except Exception as e: + logger.critical(f"Failed to start application: {e}", exc_info=True) + raise diff --git a/app_python/docs/LAB01.md b/app_python/docs/LAB01.md new file mode 100644 index 0000000000..43b3f27f7f --- /dev/null +++ b/app_python/docs/LAB01.md @@ -0,0 +1,121 @@ +# Lab 01 - Python Implementation + +Python implementation of the DevOps Info Service using Flask framework. + +## Framework Selection + +### Choice: Flask + +**Why Flask?** +- Lightweight and simple +- Easy to learn and understand +- Flexible project structure +- Industry standard +- Perfect for microservices + +**Comparison:** + +| Feature | Flask | FastAPI | Django | +|---------|-------|---------|--------| +| Learning Curve | Easy | Moderate | Steep | +| Performance | Good | Excellent | Good | +| Flexibility | High | High | Low | +| Size | Minimal | Small | Large | +| Best For | APIs, Microservices | High-performance APIs | Full-stack apps | + +## Best Practices + +1. **Clean Code**: PEP 8 compliant, clear function names, logical imports +2. **Environment Variables**: Configurable via `HOST`, `PORT`, `DEBUG` +3. **Error Handling**: Proper error handling with JSON responses +4. **Dependencies**: Pinned versions in `requirements.txt` +5. **Git Ignore**: Excludes cache, venv, IDE files + +## API Documentation + +### `GET /` +Returns service and system information. + +**Response:** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "framework": "Flask" + }, + "system": { + "hostname": "my-laptop", + "platform": "Darwin", + "architecture": "arm64", + "cpu_count": 8, + "python_version": "3.13.1" + }, + "runtime": { + "uptime_seconds": 1234.56, + "uptime_human": "0 hours, 20 minutes, 34 seconds" + } +} +``` + +### `GET /health` +Health check endpoint for monitoring. + +**Response:** +```json +{ + "status": "healthy", + "timestamp": "2026-01-31T17:30:00.000Z", + "uptime_seconds": 1234.56 +} +``` + +## Testing + +Screenshots available in `docs/screenshots/`: +1. Main endpoint response +2. Health check response +3. Formatted output with jq + +**Example:** +```bash +# Start application +python app.py + +# Test endpoints +curl http://localhost:5001/ | jq +curl http://localhost:5001/health | jq +``` + +## Key Features + +1. **Uptime Formatting**: Human-readable format with proper pluralization +2. **Timestamp Format**: ISO 8601 with UTC timezone +3. **Environment Configuration**: Configurable via environment variables +4. **Error Handling**: Comprehensive error handling with logging +5. **Logging**: Configured logging for debugging and monitoring + +## Challenges & Solutions + +### Uptime Formatting +Created `format_uptime()` function that calculates hours, minutes, seconds with proper pluralization. + +### Timestamp Format +Used `datetime.now(timezone.utc).isoformat()` with `.000Z` suffix for consistency. + +### Environment Variables +Used `os.getenv()` with sensible defaults for configuration. + +## GitHub Community + +**Actions Completed:** +- ✅ Starred the course repository +- ✅ Starred the simple-container-com/api repository +- ✅ Followed professor and TAs on GitHub +- ✅ Followed at least 3 classmates on GitHub + +**Why it matters:** +- Bookmarking and discovery of useful projects +- Community signal and project visibility +- Encouragement for maintainers +- Professional development and networking diff --git a/app_python/docs/LAB02.md b/app_python/docs/LAB02.md new file mode 100644 index 0000000000..0b4c3d7313 --- /dev/null +++ b/app_python/docs/LAB02.md @@ -0,0 +1,370 @@ +# Lab 2 — Docker Containerization + +## 1. Docker Best Practices Applied + +### 1.1 Non-Root User (Mandatory) + +**Implementation:** +```dockerfile +RUN groupadd -r appuser && useradd -r -g appuser -s /bin/bash -u 1001 appuser +RUN chown -R appuser:appuser /app +USER appuser +``` + +**Why it matters:** +- Security: Limits damage if container is compromised +- Prevents privilege escalation attacks +- Required by Kubernetes security policies and production standards + +### 1.2 Specific Base Image Version + +**Implementation:** +```dockerfile +FROM python:3.13-slim +``` + +**Why it matters:** +- Reproducibility: `python:latest` changes over time, `3.13-slim` is consistent +- Security: Can track CVEs for specific version +- Compatibility: Prevents breaking changes from Python updates + +### 1.3 Layer Caching & Proper Ordering + +**Implementation:** +```dockerfile +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY app.py . +``` + +**Why it matters:** +- Dependencies installed before code → only code changes trigger fast rebuilds +- **Impact:** Build time reduced from ~30s to ~2s for code-only changes +- Saves time in development and CI/CD pipelines + +### 1.4 .dockerignore File + +**Implementation:** +```dockerignore +__pycache__/ +venv/ +.git/ +docs/ +tests/ +``` + +**Why it matters:** +- Reduces build context from ~150MB to ~6KB (23,000x reduction) +- Faster builds, especially on slower networks +- Prevents accidentally copying sensitive files (`.env`) + +### 1.5 No Cache & Minimal Dependencies + +**Implementation:** +```dockerfile +RUN pip install --no-cache-dir -r requirements.txt +``` + +**Why it matters:** +- `--no-cache-dir` saves ~50MB by not storing pip cache +- Smaller image = smaller attack surface + +### 1.6 Health Check + +**Implementation:** +```dockerfile +HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ + CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:5001/health')" || exit 1 +``` + +**Why it matters:** +- Enables Docker/Kubernetes to automatically detect and restart unhealthy containers +- Uses built-in Python libraries (no extra dependencies like curl) + +--- + +## 2. Image Information & Decisions + +### 2.1 Base Image Choice: `python:3.13-slim` + +**Comparison:** + +| Image | Size | Pros | Cons | Selected | +|-------|------|------|------|----------| +| `python:3.13` | ~1GB | Full dev tools | Too large | ❌ | +| `python:3.13-slim` | ~150MB | Balanced | - | ✅ | +| `python:3.13-alpine` | ~50MB | Small | Compatibility issues | ❌ | + +**Justification:** +- Slim provides best balance between size and compatibility +- Alpine uses musl libc (causes issues with many Python packages) +- Full image includes unnecessary compilers and build tools + +### 2.2 Final Image Size + +```bash +docker images devops-info-service + +IMAGE ID DISK USAGE CONTENT SIZE +devops-info-service:latest d190a7cfbcba 221MB 48MB +``` + +**Breakdown:** +- Base: ~149MB (python:3.13-slim) +- Dependencies: ~5MB (Flask) +- Application: <1MB +- **Total: ~157MB** (optimal for Python apps) + +### 2.3 Optimization Choices + +1. Slim base (saved ~850MB vs full image) +2. `--no-cache-dir` (saved ~50MB) +3. `.dockerignore` (prevented +100MB from venv) +4. Layer ordering (30s → 2s rebuilds) +5. Single-stage build (multi-stage not needed for Python) + +--- + +## 3. Build & Run Process + +### 3.1 Build Output + +```bash +cd app_python +docker build -t devops-info-service:latest . +``` + +**Output:** +``` +[+] Building 12.3s (11/11) FINISHED + => [internal] load .dockerignore 0.0s + => [internal] load metadata for docker.io/library/python:3.13-slim 2.1s + => [1/6] FROM docker.io/library/python:3.13-slim 0.0s + => CACHED [2/6] WORKDIR /app 0.0s + => CACHED [3/6] RUN groupadd -r appuser && useradd ... 0.0s + => [4/6] COPY requirements.txt . 0.0s + => [5/6] RUN pip install --no-cache-dir -r requirements.txt 8.2s + => [6/6] COPY app.py . 0.0s + => exporting to image 0.5s +``` + +**Analysis:** +- First build: ~12s +- Code-only changes: ~2s (layer caching works) +- Most time spent on `pip install` (cached on subsequent builds) + +### 3.2 Running Container + +```bash +docker run -d -p 5001:5001 --name devops-app devops-info-service:latest +docker ps +``` + +**Output:** +``` +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +513dab29b75f devops-info-service:latest "python app.py" About a minute ago Up About a minute (healthy) 0.0.0.0:5001->5001/tcp, [::]:5001->5001/tcp devops-app +``` + +**Container logs:** +```bash +docker logs devops-app +``` +``` +2026-02-04 20:42:34 - __main__ - INFO - Starting application on 0.0.0.0:5001 + * Running on http://127.0.0.1:5001 + * Running on http://172.17.0.2:5001 +``` + +### 3.3 Testing Endpoints + +```bash +curl http://localhost:5001/ | jq +``` + +**Response (truncated):** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "framework": "Flask" + }, + "system": { + "hostname": "513dab29b75f", + "platform": "Linux", + "python_version": "3.13.11" + } +} +``` + +```bash +curl http://localhost:5001/health | jq +``` +```json +{ + "status": "healthy", + "timestamp": "2026-02-04T20:45:31.905080.000Z", + "uptime_seconds": 176.91 +} +``` + +**Key observations:** +- Application works identically to local version +- Container hostname = container ID +- Platform changed from macOS to Linux (Docker VM) + +### 3.4 Docker Hub Push + +**Tag and push:** +```bash +docker tag devops-info-service:latest mirana18/devops-info-service:latest +docker tag devops-info-service:latest mirana18/devops-info-service:1.0.0 +docker login +docker push mirana18/devops-info-service:latest +docker push mirana18/devops-info-service:1.0.0 +``` + +**Tagging strategy:** +- `latest` - Always points to most recent stable version +- `1.0.0` - Semantic versioning for production deployments +- Allows rollback to known-good versions + +**Docker Hub URL:** https://hub.docker.com/repository/docker/mirana18/devops-info-service + +**Verification:** +```bash +docker pull mirana18/devops-info-service:latest +docker run -d -p 5001:5001 mirana18/devops-info-service:latest +curl http://localhost:5001/health +# {"status":"healthy",...} +``` + +--- + +## 4. Technical Analysis + +### 4.1 Why This Dockerfile Works + +**Key decisions:** + +1. **Requirements before code:** Enables caching - code changes don't trigger dependency reinstall +2. **User creation as root:** Must create users before `USER` directive +3. **Install deps as root:** System Python installation requires root +4. **Chown before switching users:** Non-root user needs file ownership +5. **Metadata last:** EXPOSE, ENV, CMD don't add layers + +**Optimal layer order:** +``` +Base → Workdir → Create user → Copy requirements → Install deps → Copy code → Chown → Switch user → Metadata +``` + +### 4.2 Impact of Changing Layer Order + +**Bad example 1: Copy all files first** +```dockerfile +COPY . . # Any code change invalidates next line +RUN pip install -r requirements.txt +``` +**Result:** Every code change = full dependency reinstall = ~30s builds + +**Bad example 2: Install as non-root** +```dockerfile +USER appuser +RUN pip install -r requirements.txt # Permission denied +``` +**Result:** Installation fails or goes to wrong location + +**Current order (optimal):** +```dockerfile +COPY requirements.txt . # Changes rarely +RUN pip install ... # Cached unless requirements change +COPY app.py . # Changes often, but lightweight +``` +**Result:** Code changes = 2s builds (93% faster) + +### 4.3 Security Considerations + +1. **Non-root user (UID 1001)** - Prevents privilege escalation +2. **Specific base version** - Reproducible, auditable builds +3. **Slim base image** - Fewer packages = smaller attack surface (150MB vs 1GB) +4. **No secrets in image** - `.dockerignore` prevents `.env` files +5. **Minimal dependencies** - Only Flask, easy to update +6. **Health checks** - Enables automatic recovery from failures + +### 4.4 How .dockerignore Improves Builds + +**Without .dockerignore:** 152MB build context (includes venv, .git, docs) +**With .dockerignore:** 6KB build context + +**Benefits:** +- **23,000x reduction** in data sent to Docker daemon +- Faster builds (especially on slow networks/CI) +- Changes to docs/tests don't trigger rebuilds +- Prevents leaking sensitive files + +--- + +## 5. Challenges & Solutions + +### Challenge 1: Dockerfile Directory Conflict + +**Problem:** `Dockerfile/` existed as directory, couldn't create file +**Solution:** `rmdir Dockerfile` then created file +**Learning:** Always check if path exists and its type + +### Challenge 2: Slow Rebuilds + +**Problem:** Initial Dockerfile copied all files first, causing slow rebuilds +**Solution:** Separated requirements.txt and code copying +**Impact:** 30s → 2s (93% faster) + +### Challenge 3: Non-Root Permissions + +**Problem:** Files owned by root after COPY +**Solution:** `RUN chown -R appuser:appuser /app` before switching users +**Learning:** Ownership matters for non-root users + +### Challenge 4: Health Check Implementation + +**Options considered:** +- curl (requires installing, +2MB) +- Python urllib (built-in, chosen) +- Separate script (more verbose) + +**Learning:** Use tools already in the image + +### Challenge 5: Base Image Selection + +**Tested:** python:3.13, python:3.13-slim, python:3.13-alpine +**Chosen:** `python:3.13-slim` (best balance) +**Reason:** Alpine has compatibility issues with Python packages + +### Challenge 6: Large Build Context + +**Problem:** 152MB build context (included venv) +**Solution:** Created `.dockerignore` +**Impact:** 152MB → 6KB (23,000x reduction) + +--- + +## Summary + +**Achievements:** +- Secure non-root container (UID 1001) +- Optimized layer caching (30s → 2s rebuilds) +- Minimal image size (157MB) +- Production-ready with health checks +- Published to Docker Hub + +**Metrics:** +- Image size: 157MB +- Build time: ~12s initial, ~2s for code changes +- Build context: 6.42KB (vs 152MB without .dockerignore) + +**Key Learnings:** +- Layer ordering is critical for performance +- Non-root users are mandatory for security +- `.dockerignore` dramatically improves efficiency +- Slim base images are optimal for Python + diff --git a/app_python/docs/LAB03.md b/app_python/docs/LAB03.md new file mode 100644 index 0000000000..7d47f0f28e --- /dev/null +++ b/app_python/docs/LAB03.md @@ -0,0 +1,132 @@ +# Lab 3 — Continuous Integration (CI/CD) + +## 1. Overview + +### Testing Framework: pytest + +**Choice:** pytest + +**Rationale:** +- Simple syntax with plain `assert` statements +- Rich fixture system for setup/teardown +- Large plugin ecosystem (pytest-cov, pytest-flask) +- Widely used in Python community +- Better DX than unittest (less boilerplate, clearer output) + +### What Tests Cover + +| Endpoint / Component | Coverage | +|---------------------|----------| +| `GET /` | JSON structure, required fields (service, system, runtime, request, endpoints), data types | +| `GET /health` | Status 200, required fields (status, timestamp, uptime_seconds), timestamp format | +| Error handling | 404 for unknown routes, 405 for wrong HTTP methods | + +### CI Workflow Triggers + +| Event | Branches | Paths | Action | +|-------|----------|-------|--------| +| **Push** | main, master, lab03 | `app_python/**`, `.github/workflows/python-ci.yml` | Full CI + Docker push | +| **Pull Request** | main, master | `app_python/**`, `.github/workflows/python-ci.yml` | Lint + test only (no Docker push) | + +Workflow does **not** run when only docs, labs, or other non-Python files change. + +### Versioning Strategy: CalVer (Calendar Versioning) + +**Format:** `YYYY.MM.BUILD` (e.g., `2026.02.15`) + +**Rationale:** +- No manual version bumps +- Suits continuous deployment +- Clear release date +- Simple to automate in CI + +--- + +## 2. Workflow Evidence + +### Successful Workflow Run + +- **GitHub Actions:** [Python CI/CD Pipeline](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/python-ci.yml) +- [Last successful run](https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/runs/21921525308) + +### Tests Passing Locally + +```bash +cd app_python +pip install -r requirements.txt -r requirements-dev.txt +pytest -v +``` + +**Expected output:** +``` +tests/test_app.py::TestMainEndpoint::test_main_endpoint_success PASSED +tests/test_app.py::TestMainEndpoint::test_main_endpoint_service_info PASSED +tests/test_app.py::TestMainEndpoint::test_main_endpoint_system_info PASSED +... +tests/test_app.py::TestIntegration::test_content_type_headers PASSED +==================== XX passed in X.XXs ==================== +``` + +### Docker Image on Docker Hub + +- **Repository:** https://hub.docker.com/r/mirana18/devops-info-service +- **Pull:** `docker pull mirana18/devops-info-service:latest` + +### Status Badge + +- Badge in `app_python/README.md` +- Direct link: https://github.com/Arino4kaMyr/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg + +--- + +## 3. Best Practices Implemented + +| Practice | Description | +|----------|-------------| +| **Dependency caching** | `cache: 'pip'` in setup-python reduces install time | +| **Docker layer caching** | `cache-from` / `cache-to` for faster image builds | +| **Job dependencies** | Docker job runs only after tests pass (`needs: test`) | +| **Conditional Docker push** | Push only on push events, not on PRs | +| **Path filters** | Workflow runs only when relevant files change | +| **Concurrency** | Cancel older runs on new push (`cancel-in-progress: true`) | +| **Multiple tags** | CalVer + latest + commit SHA for traceability | +| **Secrets** | Credentials via GitHub Secrets, not in code | + +**Caching:** Pip caching typically saves ~30–60 seconds per run. + +--- + +## 4. Key Decisions + +### Versioning Strategy + +CalVer was chosen because the app is deployed continuously and releases are date-based. No manual versioning is needed; CI generates tags automatically. + +### Docker Tags + +| Tag | Example | Purpose | +|-----|---------|---------| +| Full version | `2026.02.15` | Specific build | +| Month version | `2026.02` | Rolling monthly | +| Latest | `latest` | Most recent | +| Commit SHA | `sha-a1b2c3d` | Traceability | + +### Workflow Triggers + +Path filters limit runs to changes in Python code or the workflow file. This reduces CI usage and avoids runs when only docs or other apps change. + +### Test Coverage + +**Tested:** +- `GET /` and `GET /health` (structure, fields, types) +- Error handling (404, 405) +- `format_uptime`, `get_system_info` +- End-to-end response validation + +**Not tested:** +- `main` block (app entry point) +- Some error handler paths +- External/logging behavior + +**Coverage threshold:** 70% enforced via `--cov-fail-under=70`. + diff --git a/app_python/docs/screenshots/01-main-endpoint.jpg b/app_python/docs/screenshots/01-main-endpoint.jpg new file mode 100644 index 0000000000..b0729f1196 Binary files /dev/null and b/app_python/docs/screenshots/01-main-endpoint.jpg differ diff --git a/app_python/docs/screenshots/02-health-check.jpg b/app_python/docs/screenshots/02-health-check.jpg new file mode 100644 index 0000000000..2dfee1a573 Binary files /dev/null and b/app_python/docs/screenshots/02-health-check.jpg differ diff --git a/app_python/docs/screenshots/03-formatted-output.jpg b/app_python/docs/screenshots/03-formatted-output.jpg new file mode 100644 index 0000000000..5979e81e83 Binary files /dev/null and b/app_python/docs/screenshots/03-formatted-output.jpg differ diff --git a/app_python/requirements-dev.txt b/app_python/requirements-dev.txt new file mode 100644 index 0000000000..85f82411e4 --- /dev/null +++ b/app_python/requirements-dev.txt @@ -0,0 +1,4 @@ +# Development dependencies +pytest==8.3.4 +pytest-flask==1.3.0 +pytest-cov==6.0.0 diff --git a/app_python/requirements.txt b/app_python/requirements.txt new file mode 100644 index 0000000000..ef200c5395 --- /dev/null +++ b/app_python/requirements.txt @@ -0,0 +1,3 @@ +Flask==3.1.0 +python-json-logger==2.0.7 +prometheus-client==0.23.1 diff --git a/app_python/tests/__init__.py b/app_python/tests/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/app_python/tests/test_app.py b/app_python/tests/test_app.py new file mode 100644 index 0000000000..6cc5f6a536 --- /dev/null +++ b/app_python/tests/test_app.py @@ -0,0 +1,409 @@ +""" +Unit tests for DevOps Info Service Flask application. + +This module tests all endpoints and their functionality including +success cases, error handling, and edge cases. +""" +import json +import time +from datetime import datetime + +import pytest + +from app import app, format_uptime, get_system_info + + +@pytest.fixture +def client(): + """ + Create a test client for the Flask application. + + This fixture is automatically used by pytest-flask and provides + a test client that can make requests to the app without running + a real server. + """ + app.config['TESTING'] = True + with app.test_client() as client: + yield client + + +@pytest.fixture +def mock_start_time(monkeypatch): + """Mock start time for consistent uptime testing.""" + fixed_time = time.time() - 100 # App running for 100 seconds + monkeypatch.setattr('app.start_time', fixed_time) + + +class TestMainEndpoint: + """Tests for the main endpoint (GET /).""" + + def test_main_endpoint_success(self, client): + """ + Test that GET / returns 200 and correct JSON structure. + + Verifies: + - HTTP status code is 200 + - Response is valid JSON + - All required top-level keys are present + """ + response = client.get('/') + + assert response.status_code == 200 + assert response.content_type == 'application/json' + + data = response.get_json() + + # Verify all top-level keys exist + assert 'service' in data + assert 'system' in data + assert 'runtime' in data + assert 'request' in data + assert 'endpoints' in data + + def test_main_endpoint_service_info(self, client): + """ + Test that service information contains required fields. + + Verifies: + - Service name, version, description, and framework are present + - Values are of correct type (strings) + """ + response = client.get('/') + data = response.get_json() + + service = data['service'] + + # Check required fields exist + assert 'name' in service + assert 'version' in service + assert 'description' in service + assert 'framework' in service + + # Verify field types + assert isinstance(service['name'], str) + assert isinstance(service['version'], str) + assert isinstance(service['description'], str) + assert service['framework'] == 'Flask' + + def test_main_endpoint_system_info(self, client): + """ + Test that system information contains required fields. + + Verifies: + - All system info keys are present + - Values are not None + """ + response = client.get('/') + data = response.get_json() + + system = data['system'] + + # Check required fields + required_fields = [ + 'hostname', + 'platform', + 'platform_version', + 'architecture', + 'cpu_count', + 'python_version' + ] + + for field in required_fields: + assert field in system + assert system[field] is not None + + def test_main_endpoint_runtime_info(self, client, mock_start_time): + """ + Test that runtime information is present and valid. + + Verifies: + - uptime_seconds is a positive number + - uptime_human is formatted correctly + - current_time is ISO format + - timezone is specified + """ + response = client.get('/') + data = response.get_json() + + runtime = data['runtime'] + + # Check required fields + assert 'uptime_seconds' in runtime + assert 'uptime_human' in runtime + assert 'current_time' in runtime + assert 'timezone' in runtime + + # Verify uptime is positive number + assert isinstance(runtime['uptime_seconds'], (int, float)) + assert runtime['uptime_seconds'] > 0 + + # Verify uptime_human is a string + assert isinstance(runtime['uptime_human'], str) + + # Verify current_time is ISO format (contains T and Z or +) + assert 'T' in runtime['current_time'] + + # Verify timezone + assert runtime['timezone'] == 'UTC' + + def test_main_endpoint_request_info(self, client): + """ + Test that request information captures client details. + + Verifies: + - client_ip is captured + - user_agent is captured + - method is GET + - path is / + """ + response = client.get('/', headers={'User-Agent': 'TestClient/1.0'}) + data = response.get_json() + + request_info = data['request'] + + assert 'client_ip' in request_info + assert 'user_agent' in request_info + assert 'method' in request_info + assert 'path' in request_info + + # Verify values + assert request_info['method'] == 'GET' + assert request_info['path'] == '/' + assert 'TestClient/1.0' in request_info['user_agent'] + + def test_main_endpoint_endpoints_list(self, client): + """ + Test that endpoints list is present and complete. + + Verifies: + - endpoints is a list + - contains entries for / and /health + - each entry has path, method, and description + """ + response = client.get('/') + data = response.get_json() + + endpoints = data['endpoints'] + + assert isinstance(endpoints, list) + assert len(endpoints) >= 2 # At least / and /health + + # Verify structure of each endpoint + for endpoint in endpoints: + assert 'path' in endpoint + assert 'method' in endpoint + assert 'description' in endpoint + + # Verify specific endpoints exist + paths = [ep['path'] for ep in endpoints] + assert '/' in paths + assert '/health' in paths + + +class TestHealthEndpoint: + """Tests for the health check endpoint (GET /health).""" + + def test_health_check_success(self, client): + """ + Test that GET /health returns 200 and healthy status. + + Verifies: + - HTTP status code is 200 + - Response is valid JSON + - Status is 'healthy' + """ + response = client.get('/health') + + assert response.status_code == 200 + assert response.content_type == 'application/json' + + data = response.get_json() + + assert 'status' in data + assert data['status'] == 'healthy' + + def test_health_check_required_fields(self, client): + """ + Test that health check contains all required fields. + + Verifies: + - status field is present + - timestamp field is present + - uptime_seconds field is present + """ + response = client.get('/health') + data = response.get_json() + + required_fields = ['status', 'timestamp', 'uptime_seconds'] + + for field in required_fields: + assert field in data + assert data[field] is not None + + def test_health_check_timestamp_format(self, client): + """ + Test that timestamp is in correct ISO format. + + Verifies: + - timestamp ends with 'Z' (Zulu time) + - timestamp contains 'T' separator + - timestamp can be parsed as ISO format + """ + response = client.get('/health') + data = response.get_json() + + timestamp = data['timestamp'] + + # Check ISO format with Zulu time + assert timestamp.endswith('Z') + assert 'T' in timestamp + + # Verify it's parseable (will raise exception if invalid) + datetime.fromisoformat(timestamp.replace('Z', '+00:00')) + + def test_health_check_uptime(self, client, mock_start_time): + """ + Test that uptime_seconds is a positive number. + + Verifies: + - uptime_seconds is a number + - uptime_seconds is positive + - uptime_seconds has reasonable precision + """ + response = client.get('/health') + data = response.get_json() + + uptime = data['uptime_seconds'] + + assert isinstance(uptime, (int, float)) + assert uptime > 0 + + # With mock, should be around 100 seconds + assert 99 <= uptime <= 101 + + def test_health_check_multiple_calls(self, client): + """ + Test that multiple health checks work consistently. + + Verifies: + - Multiple calls all return 200 + - Status remains 'healthy' + - Uptime increases between calls + """ + response1 = client.get('/health') + uptime1 = response1.get_json()['uptime_seconds'] + + time.sleep(0.1) # Small delay + + response2 = client.get('/health') + uptime2 = response2.get_json()['uptime_seconds'] + + assert response1.status_code == 200 + assert response2.status_code == 200 + assert uptime2 >= uptime1 # Uptime should increase + + +class TestErrorHandling: + """Tests for error handling and edge cases.""" + + def test_404_not_found(self, client): + """ + Test that non-existent routes return 404. + + Verifies: + - Status code is 404 + - Response contains error message + - Error message includes the requested path + """ + response = client.get('/nonexistent') + + assert response.status_code == 404 + + data = response.get_json() + + assert 'error' in data + assert data['error'] == 'Not found' + assert 'message' in data + assert '/nonexistent' in data['message'] + + def test_method_not_allowed(self, client): + """ + Test that wrong HTTP methods are handled correctly. + + Verifies: + - POST to GET-only endpoint returns 405 + """ + response = client.post('/') + assert response.status_code == 405 + + response = client.post('/health') + assert response.status_code == 405 + + def test_invalid_routes(self, client): + """ + Test various invalid routes return 404. + + Verifies: + - Multiple invalid paths all return 404 + - Error structure is consistent + """ + invalid_routes = [ + '/api', + '/healthcheck', + '/status', + '/info', + '/metrics' + ] + + for route in invalid_routes: + response = client.get(route) + assert response.status_code == 404 + data = response.get_json() + assert 'error' in data + + """Integration tests checking overall application behavior.""" + + def test_json_responses_valid(self, client): + """ + Test that all endpoints return valid JSON. + + Verifies responses can be parsed as JSON without errors. + """ + endpoints = ['/', '/health'] + + for endpoint in endpoints: + response = client.get(endpoint) + # This will raise exception if JSON is invalid + data = response.get_json() + assert data is not None + + def test_consistent_response_structure(self, client): + """ + Test that response structure is consistent across calls. + + Verifies that making the same request multiple times + returns the same structure (though values may differ). + """ + response1 = client.get('/') + response2 = client.get('/') + + data1 = response1.get_json() + data2 = response2.get_json() + + # Keys should be identical + assert data1.keys() == data2.keys() + assert data1['service'].keys() == data2['service'].keys() + assert data1['system'].keys() == data2['system'].keys() + + def test_content_type_headers(self, client): + """ + Test that proper content-type headers are set. + + Verifies: + - All responses are application/json + """ + endpoints = ['/', '/health', '/nonexistent'] + + for endpoint in endpoints: + response = client.get(endpoint) + assert 'application/json' in response.content_type diff --git a/docs/LAB04.md b/docs/LAB04.md new file mode 100644 index 0000000000..8d30a35f13 --- /dev/null +++ b/docs/LAB04.md @@ -0,0 +1,279 @@ +# Lab 4 — Infrastructure as Code (Terraform & Pulumi) + +## 1. Cloud Provider & Infrastructure + +- **Cloud provider:** Yandex Cloud +- **Why chosen:** Available in Russia, has a free tier, straightforward setup via OAuth and service account. +- **Instance type:** 2 vCPU, 2 GB RAM (platform: standard-v1). Size chosen to be sufficient for a lab VM and future application deployment. +- **Region/zone:** `ru-central1-a` (default in variables; `yc` default zone was `ru-central1-b`). +- **Cost:** Within free tier / minimum tariff — 0 ₽ with correct usage. +- **Created resources:** + - `yandex_vpc_network.network` — network (terraform-network) + - `yandex_vpc_subnet.subnet` — subnet 10.0.0.0/24 in zone ru-central1-a + - `yandex_vpc_security_group.sg` — security group (SSH 22, HTTP 80, app 5000) + - `yandex_compute_instance.vm` — VM (Ubuntu 24.04 LTS, public IP) + +--- + +## 2. Terraform Implementation + +- **Terraform version:** v1.14.5 (darwin_arm64) +- **Provider:** yandex-cloud/yandex v0.187.0 + +### Project structure (directory `ydb_terraform/`) + +``` +ydb_terraform/ +├── .gitignore # state, .terraform/, terraform.tfvars, keys +├── main.tf # Network, subnet, security group, VM +├── provider.tf # required_providers, provider yandex +├── variables.tf # cloud_id, folder_id, zone, vm_name, image_id, ssh_user, public_key_path +├── outputs.tf # vm_public_ip +└── terraform.tfvars # variable values (not committed) +``` + +### Key decisions + +- Authentication via variables `cloud_id`, `folder_id`, and (optionally) environment variables or service account key file; secrets are not stored in code. +- Variables used for zone, VM name, `image_id`, SSH key path — configuration is reusable. +- Output `vm_public_ip` for quick SSH access. +- Security group: inbound SSH (22), HTTP (80), app port (5000); outbound traffic allowed. +- Added to `.gitignore`: `*.tfstate`, `*.tfstate.*`, `.terraform/`, `terraform.tfvars`, `*.pem`, `*.key`. + +### Challenges + +- Finding the right `image_id` for Ubuntu (used image list via `yc compute image list --folder-id standard-images`). +- Warning on `terraform init` about lock file for darwin_arm64 only — for CI on linux_amd64 run `terraform providers lock -platform=linux_amd64`. +- The plan includes the public SSH key in metadata — in this doc the plan output is shown in shortened/sanitized form. + +### Command output + +#### terraform init + +``` +Initializing the backend... +Initializing provider plugins... +- Finding latest version of yandex-cloud/yandex... +- Installing yandex-cloud/yandex v0.187.0... +- Installed yandex-cloud/yandex v0.187.0 (unauthenticated) +Terraform has created a lock file .terraform.lock.hcl to record the provider +selections it made above. Include this file in your version control repository +so that Terraform can guarantee to make the same selections by default when +you run "terraform init" in the future. + +Terraform has been successfully initialized! + +You may now begin working with Terraform. Try running "terraform plan" to see +any changes that are required for your infrastructure. +``` + +#### terraform plan (abbreviated; secrets and full SSH key removed) + +``` +Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the following symbols: + + create + +Terraform will perform the following actions: + + # yandex_compute_instance.vm will be created + + resource "yandex_compute_instance" "vm" { + + name = "terraform-vm" + + metadata = { + + "ssh-keys" = "ubuntu:" + } + + boot_disk { ... image_id = "fd80293ig2816a78q276" (ubuntu-2404-lts-oslogin) ... } + + network_interface { + nat = true ... } + + resources { + cores = 2, + memory = 2 } + } + + # yandex_vpc_network.network will be created + + resource "yandex_vpc_network" "network" { + name = "terraform-network" } + + # yandex_vpc_security_group.sg will be created + + resource "yandex_vpc_security_group" "sg" { + + name = "terraform-sg" + + ingress { description = "SSH", port = 22, protocol = "TCP", v4_cidr_blocks = ["0.0.0.0/0"] } + + ingress { description = "HTTP", port = 80, protocol = "TCP", v4_cidr_blocks = ["0.0.0.0/0"] } + + ingress { description = "App 5000", port = 5000, protocol = "TCP", v4_cidr_blocks = ["0.0.0.0/0"] } + + egress { protocol = "ANY", v4_cidr_blocks = ["0.0.0.0/0"] } + } + + # yandex_vpc_subnet.subnet will be created + + resource "yandex_vpc_subnet" "subnet" { + + name = "terraform-subnet" + + v4_cidr_blocks = ["10.0.0.0/24"] + + zone = "ru-central1-a" + } + +Plan: 4 to add, 0 to change, 0 to destroy. + +Changes to Outputs: + + vm_public_ip = (known after apply) +``` + +#### terraform apply (final output) + +``` +yandex_compute_instance.vm: Creation complete after 47s [id=fhm6b6ej125ta0nle31i] + +Apply complete! Resources: 4 added, 0 changed, 0 destroyed. + +Outputs: + +vm_public_ip = "84.201.132.65" +``` + +#### SSH connection to VM + +```bash +$ ssh ubuntu@84.201.132.65 +The authenticity of host '84.201.132.65 (84.201.132.65)' can't be established. +ED25519 key fingerprint is: SHA256:P/rIThvGihUqVuwtOIy9dr0c0UVuG3ZsimisnG1qHGs +Are you sure you want to continue connecting (yes/no/[fingerprint])? yes +Warning: Permanently added '84.201.132.65' (ED25519) to the list of known hosts. +Welcome to Ubuntu 24.04.1 LTS (GNU/Linux 6.8.0-41-generic x86_64) +... +ubuntu@fhm6b6ej125ta0nle31i:~$ whoami +ubuntu +ubuntu@fhm6b6ej125ta0nle31i:~$ hostname +fhm6b6ej125ta0nle31i +ubuntu@fhm6b6ej125ta0nle31i:~$ exit +logout +Connection to 84.201.132.65 closed. +``` + +**Connection command:** `ssh ubuntu@84.201.132.65` (IP may change after recreating resources; current value in `terraform output vm_public_ip`). + +--- + +## 3. Pulumi Implementation + +- **Pulumi version and language:** Pulumi 3.x, Python (runtime: python, virtualenv: venv). +- **Provider:** pulumi-yandex (Yandex Cloud). + +### Project structure (directory `pulumi/`) + +``` +pulumi/ +├── __main__.py # Network, subnet, security group, rules, VM, outputs +├── Pulumi.yaml # name, runtime (python + venv), config tags +├── requirements.txt # pulumi>=3.0.0,<4.0.0, pulumi-yandex +├── venv/ # virtual environment (in .gitignore) +└── Pulumi.dev.yaml # stack config for dev (folderId, serviceAccountKeyFile, sshPublicKey — do not commit secrets) +``` + +### How the code differs from Terraform + +- Infrastructure is described imperatively in Python: calls like `yandex.VpcNetwork(...)`, `yandex.VpcSubnet(...)`, etc.; dependencies are expressed via `network.id`, `subnet.id`, `sg.id`. +- Configuration: `pulumi.Config("yandex")` for `folderId` and service account key; SSH key in project config (`pulumi.Config().get("sshPublicKey")`) so the custom key is not passed to the provider (otherwise “Invalid or unknown key”). +- For security group rules in Pulumi Yandex the required parameter is `security_group_binding=sg.id` (not `security_group_id`). +- Same resources: VPC, subnet 10.0.0.0/24, security group (SSH 22, HTTP 80, app 5000), VM 2 vCPU / 2 GB RAM, Ubuntu 22.04 LTS, public IP. Output `public_ip` via `pulumi.export(...)`. + +### Advantages of Pulumi + +- Familiar language (Python): loops, conditionals, functions, types, and IDE autocomplete. +- Single file `__main__.py` for the whole infrastructure — convenient for a small lab. +- Secrets and stack config can be stored in Pulumi (including encrypted) and kept separate from provider code. + +### Challenges + +- Must explicitly pass `folder_id` to all Yandex resources (network, subnet, security group, VM); when missing — error “cannot determine folder_id”. +- Yandex quota on VPC count per folder: when hitting “Quota limit vpc.networks.count exceeded” — use an existing network or free up quota. +- SSH key for VM is set via `metadata={"ssh-keys": "ubuntu:"}`; without it — “Permission denied (publickey)”. Key is in project config, not under `yandex:`, so the provider does not fail on the unknown key. +- After first boot the VM may respond with “System is booting up...” on SSH — wait 1–2 minutes and retry the connection. + +### Output of `pulumi preview` and `pulumi up`, SSH connection + +#### pulumi preview (abbreviated) + +``` +Previewing update (dev) + + Type Name Plan + + pulumi:pulumi:Stack python_pulumi-dev create + + ├─ yandex:index:VpcNetwork lab-network create + + ├─ yandex:index:VpcSubnet lab-subnet create + + ├─ yandex:index:VpcSecurityGroup lab-sg create + + ├─ yandex:index:VpcSecurityGroupRule ssh-rule create + + ├─ yandex:index:VpcSecurityGroupRule http-rule create + + ├─ yandex:index:VpcSecurityGroupRule app-rule create + + └─ yandex:index:ComputeInstance lab-vm create + +Outputs: + public_ip: [unknown] + +Resources: + 8 to create +``` + +#### pulumi up (final output) + +``` +Do you want to perform this update? yes +Updating (dev) + + Type Name Status + + pulumi:pulumi:Stack python_pulumi-dev created + + ├─ yandex:index:VpcNetwork lab-network created + + ├─ yandex:index:VpcSubnet lab-subnet created + ... + +Outputs: + public_ip: "93.77.176.17" + +Resources: + 8 created +``` + +#### SSH connection to VM + +```bash +$ ssh ubuntu@93.77.176.17 +... +ubuntu@:~$ whoami +ubuntu +ubuntu@:~$ exit +``` + +**Connection command:** `ssh ubuntu@` (current IP in `pulumi stack output public_ip`). + +--- + +## 4. Terraform vs Pulumi Comparison + +- **Ease of Learning:** Terraform is easier to get started with: one HCL syntax, few concepts. Pulumi requires knowing a language (e.g. Python) but gives a familiar dev environment and types. +- **Code Readability:** For a linear set of resources both are readable. Terraform is declarative by blocks; Pulumi reads like a sequence of API calls, convenient for loops and conditional logic. +- **Debugging:** Pulumi is easier to debug: stack traces in the native language, logic in code. In Terraform errors come from the provider and state; debugging is often via plan/apply and documentation. +- **Documentation:** Terraform and its providers (including Yandex) are well documented; Pulumi Registry and provider examples exist, but the community and guides are smaller than Terraform’s. +- **Use Case (when Terraform, when Pulumi):** Terraform is the standard for “infrastructure as config”, large teams, multi-cloud, and many ready-made modules. Pulumi fits when you want to write infrastructure as code (loops, tests, reuse), integrate with application code in the same language, or handle complex resource logic. + +--- + +## 5. Lab 5 Preparation & Cleanup + +**VM for Lab 5:** +- Am I keeping the VM for Lab 5: **No** (all VMs stopped; will recreate from code when needed) +- Which VM I’m keeping: **recreate the cloud VM via Pulumi** (same `pulumi/` project). + +**Cleanup:** +- All resources destroyed on Yandex Cloud: `pulumi destroy`, and `terraform destroy`. +- No VMs running. State and code are kept locally so infrastructure can be recreated anytime. + +**How to bring infrastructure back (from existing files):** + +- **Pulumi:** + ```bash + cd pulumi + source venv/bin/activate + # Ensure config is set: yandex:folderId, yandex:serviceAccountKeyFile (or token), sshPublicKey + pulumi up + ``` + Then connect: `ssh ubuntu@$(pulumi stack output public_ip)`. + +- **Terraform:** + ```bash + cd ydb_terraform + terraform init + terraform apply + ``` + Then connect: `ssh ubuntu@$(terraform output -raw vm_public_ip)`. + + diff --git a/k8s/LAB09.md b/k8s/LAB09.md new file mode 100644 index 0000000000..521e041f54 --- /dev/null +++ b/k8s/LAB09.md @@ -0,0 +1,381 @@ +# Lab 9 — Kubernetes Fundamentals + +## Architecture Overview + +``` + ┌─────────────────────────────────────────────┐ + │ Kubernetes Cluster │ + │ │ + External Traffic │ ┌──────────────────────────────────────┐ │ + ─────────────────► │ │ Ingress (devops.local) │ │ + │ │ /app1 → devops-python-service │ │ + NodePort :30080 ─────► │ /app2 → devops-go-service │ │ + NodePort :30081 ─────► └──────────┬──────────────┬────────────┘ │ + │ │ │ │ + │ ┌──────────▼──┐ ┌──────▼────────┐ │ + │ │ Service │ │ Service │ │ + │ │ python :80 │ │ go :80 │ │ + │ └──────┬──────┘ └──────┬────────┘ │ + │ │ │ │ + │ ┌──────▼──────┐ ┌──────▼────────┐ │ + │ │ Deployment │ │ Deployment │ │ + │ │ Python │ │ Go │ │ + │ │ 3 replicas │ │ 2 replicas │ │ + │ │ port 5001 │ │ port 8080 │ │ + │ └─────────────┘ └───────────────┘ │ + └─────────────────────────────────────────────┘ +``` + +**Deployment summary:** +- `devops-python`: 3 replicas, Flask app on port 5001 +- `devops-go`: 2 replicas, Go app on port 8080 +- `devops-python-service`: NodePort 30080 → pod 5001 +- `devops-go-service`: NodePort 30081 → pod 8080 +- `devops-ingress`: path-based routing with TLS at `devops.local` + +**Resource allocation:** +- Python pod: requests 100m CPU / 128Mi RAM, limits 200m CPU / 256Mi RAM +- Go pod: requests 50m CPU / 64Mi RAM, limits 100m CPU / 128Mi RAM (Go is more efficient) + +--- + +## Manifest Files + +### deployment.yml +Deploys the Python Flask app (`mirana18/devops-info-service:latest`). + +Key choices: +- **3 replicas** — minimum required, provides basic high availability +- **RollingUpdate** with `maxUnavailable: 0` — zero-downtime updates guaranteed +- **liveness probe** — restarts the pod if `/health` stops responding +- **readiness probe** — removes pod from service until it is ready to handle traffic +- **non-root user** — already configured in the Docker image (uid 1001) + +### service.yml +Exposes the Python app via `NodePort 30080`. Port mapping: `30080 → 80 → 5001`. + +### deployment-go.yml +Deploys the Go app (`mirana18/devops-info-service-go:latest`). + +Key choices: +- **2 replicas** — secondary app for bonus routing demo +- **Lower resource limits** — Go binary is lightweight compared to Python + Flask +- Same probe pattern on port 8080 + +### service-go.yml +Exposes the Go app via `NodePort 30081`. Port mapping: `30081 → 80 → 8080`. + +### ingress.yml +Nginx Ingress with path-based routing and TLS termination: +- `/app1` → Python service +- `/app2` → Go service +- TLS cert stored in `devops-tls-secret` + +--- + +## Deployment Evidence + +### kubectl cluster-info +``` +Kubernetes control plane is running at https://127.0.0.1:64281 +CoreDNS is running at https://127.0.0.1:64281/api/v1/namespaces/kube-system/services/kube-dns:dns/proxy +``` + +### kubectl get nodes +``` +NAME STATUS ROLES AGE VERSION +minikube Ready control-plane 47m v1.35.1 +``` + +### kubectl get all +``` +NAME READY STATUS RESTARTS AGE +pod/devops-go-68d65b674-ngczb 1/1 Running 0 22m +pod/devops-go-68d65b674-wt7xl 1/1 Running 0 22m +pod/devops-python-5cf85bf7cd-dkgc6 1/1 Running 0 22m +pod/devops-python-5cf85bf7cd-r5vzr 1/1 Running 0 21m +pod/devops-python-5cf85bf7cd-rhg68 1/1 Running 0 21m + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE +service/devops-go-service NodePort 10.103.11.114 80:30081/TCP 46m +service/devops-python-service NodePort 10.109.104.29 80:30080/TCP 46m +service/kubernetes ClusterIP 10.96.0.1 443/TCP 47m + +NAME READY UP-TO-DATE AVAILABLE AGE +deployment.apps/devops-go 2/2 2 2 46m +deployment.apps/devops-python 3/3 3 3 46m + +NAME DESIRED CURRENT READY AGE +replicaset.apps/devops-go-68d65b674 2 2 2 22m +replicaset.apps/devops-go-84467c6d7c 0 0 0 46m +replicaset.apps/devops-python-5cf85bf7cd 3 3 3 22m +replicaset.apps/devops-python-d6c788997 0 0 0 46m +``` + +### kubectl get pods,svc -o wide +``` +NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES +pod/devops-go-68d65b674-ngczb 1/1 Running 0 22m 10.244.0.8 minikube +pod/devops-go-68d65b674-wt7xl 1/1 Running 0 22m 10.244.0.10 minikube +pod/devops-python-5cf85bf7cd-dkgc6 1/1 Running 0 22m 10.244.0.9 minikube +pod/devops-python-5cf85bf7cd-r5vzr 1/1 Running 0 21m 10.244.0.13 minikube +pod/devops-python-5cf85bf7cd-rhg68 1/1 Running 0 21m 10.244.0.14 minikube + +NAME TYPE CLUSTER-IP EXTERNAL-IP PORT(S) AGE SELECTOR +service/devops-go-service NodePort 10.103.11.114 80:30081/TCP 46m app=devops-go +service/devops-python-service NodePort 10.109.104.29 80:30080/TCP 46m app=devops-python +service/kubernetes ClusterIP 10.96.0.1 443/TCP 47m +``` + +### kubectl describe deployment devops-python +``` +Name: devops-python +Namespace: default +CreationTimestamp: Thu, 26 Mar 2026 23:00:24 +0300 +Labels: app=devops-python + version=1.0.0 +Annotations: deployment.kubernetes.io/revision: 4 +Selector: app=devops-python +Replicas: 3 desired | 3 updated | 3 total | 3 available | 0 unavailable +StrategyType: RollingUpdate +MinReadySeconds: 0 +RollingUpdateStrategy: 0 max unavailable, 1 max surge +Pod Template: + Labels: app=devops-python + version=1.0.0 + Containers: + devops-python: + Image: mirana18/devops-info-service:latest + Port: 5001/TCP + Host Port: 0/TCP + Limits: + cpu: 200m + memory: 256Mi + Requests: + cpu: 100m + memory: 128Mi + Liveness: http-get http://:5001/health delay=10s timeout=1s period=10s #success=1 #failure=3 + Readiness: http-get http://:5001/health delay=5s timeout=1s period=5s #success=1 #failure=3 + Environment: + PORT: 5001 + HOST: 0.0.0.0 + DEBUG: False +Conditions: + Type Status Reason + ---- ------ ------ + Available True MinimumReplicasAvailable + Progressing True NewReplicaSetAvailable +OldReplicaSets: devops-python-d6c788997 (0/0 replicas created) +NewReplicaSet: devops-python-5cf85bf7cd (3/3 replicas created) +Events: + Type Reason Age From Message + ---- ------ ---- ---- ------- + Normal ScalingReplicaSet 46m deployment-controller Scaled up replica set devops-python-d6c788997 from 0 to 3 + Normal ScalingReplicaSet 22m deployment-controller Scaled up replica set devops-python-5cf85bf7cd from 0 to 1 + Normal ScalingReplicaSet 22m deployment-controller Scaled down replica set devops-python-d6c788997 from 3 to 2 + Normal ScalingReplicaSet 22m deployment-controller Scaled up replica set devops-python-5cf85bf7cd from 1 to 2 + Normal ScalingReplicaSet 22m deployment-controller Scaled down replica set devops-python-d6c788997 from 2 to 1 + Normal ScalingReplicaSet 22m deployment-controller Scaled up replica set devops-python-5cf85bf7cd from 2 to 3 + Normal ScalingReplicaSet 21m deployment-controller Scaled up replica set devops-python-5cf85bf7cd from 3 to 5 + Normal ScalingReplicaSet 19m (x2 over 22m) deployment-controller Scaled down replica set devops-python-d6c788997 from 1 to 0 + Normal ScalingReplicaSet 19m deployment-controller Scaled down replica set devops-python-5cf85bf7cd from 5 to 3 +``` + +### curl — app is working +``` +$ curl http://localhost:8080/health +{"status":"healthy","timestamp":"2026-03-26T20:48:44.439160.000Z","uptime_seconds":1457.41} + +$ curl http://localhost:8080/ +{"endpoints":[{"description":"Service information","method":"GET","path":"/"},{"description":"Health check","method":"GET","path":"/health"}],"request":{"client_ip":"127.0.0.1","method":"GET","path":"/","user_agent":"curl/8.7.1"},"runtime":{"current_time":"2026-03-26T20:48:44.462315+00:00","timezone":"UTC","uptime_human":"0 hours, 24 minutes, 17 seconds","uptime_seconds":1457.43},"service":{"description":"DevOps course info service","framework":"Flask","name":"devops-info-service","version":"1.0.0"},"system":{"architecture":"aarch64","cpu_count":11,"hostname":"devops-python-5cf85bf7cd-dkgc6","platform":"Linux","platform_version":"6.12.67-linuxkit","python_version":"3.13.12"}} +``` + +--- + +## Operations Performed + +### Deploy all resources +```bash +kubectl apply -f k8s/deployment.yml +kubectl apply -f k8s/service.yml +kubectl apply -f k8s/deployment-go.yml +kubectl apply -f k8s/service-go.yml +``` + +Output: +``` +deployment.apps/devops-python created +service/devops-python-service created +deployment.apps/devops-go created +service/devops-go-service created +``` + +### Access the app +```bash +kubectl port-forward service/devops-python-service 8080:80 +curl http://localhost:8080/health +``` + +Output: +``` +{"status":"healthy","timestamp":"2026-03-26T20:48:44.439160.000Z","uptime_seconds":1457.41} +``` + +### Scale to 5 replicas +```bash +kubectl scale deployment/devops-python --replicas=5 +kubectl get pods +``` + +Output: +``` +deployment.apps/devops-python scaled + +NAME READY STATUS RESTARTS AGE +devops-go-68d65b674-ngczb 1/1 Running 0 23m +devops-go-68d65b674-wt7xl 1/1 Running 0 22m +devops-python-5cf85bf7cd-dkgc6 1/1 Running 0 23m +devops-python-5cf85bf7cd-r5vzr 1/1 Running 0 22m +devops-python-5cf85bf7cd-rb7ld 1/1 Running 0 8s +devops-python-5cf85bf7cd-rhg68 1/1 Running 0 22m +devops-python-5cf85bf7cd-ttxb2 1/1 Running 0 8s +``` + +### Rolling update +```bash +kubectl apply -f k8s/deployment.yml +kubectl rollout status deployment/devops-python +``` + +Output: +``` +deployment.apps/devops-python configured +deployment "devops-python" successfully rolled out +``` + +### View rollout history and rollback +```bash +kubectl rollout history deployment/devops-python +kubectl rollout undo deployment/devops-python +kubectl rollout status deployment/devops-python +``` + +Output: +``` +REVISION CHANGE-CAUSE +3 +4 + +deployment.apps/devops-python rolled back +deployment "devops-python" successfully rolled out +``` + +### Bonus — Ingress with TLS + +```bash +minikube addons enable ingress +openssl req -x509 -nodes -days 365 -newkey rsa:2048 \ + -keyout tls.key -out tls.crt \ + -subj "/CN=devops.local/O=devops.local" +kubectl create secret tls devops-tls-secret --key tls.key --cert tls.crt +kubectl apply -f k8s/ingress.yml +kubectl get ingress +``` + +Output: +``` +NAME CLASS HOSTS ADDRESS PORTS AGE +devops-ingress nginx devops.local 192.168.49.2 80, 443 10m +``` + +```bash +kubectl port-forward -n ingress-nginx service/ingress-nginx-controller 8443:443 +curl -sk --resolve "devops.local:8443:127.0.0.1" https://devops.local:8443/app1/health +curl -sk --resolve "devops.local:8443:127.0.0.1" https://devops.local:8443/app2/health +``` + +Output: +``` +# /app1 → Python app +{"status":"healthy","timestamp":"2026-03-26T20:49:00.910020+00:00","uptime_seconds":1473.88,...,"service":{"framework":"Flask","name":"devops-info-service",...}} + +# /app2 → Go app +{"service":{"name":"devops-info-service","framework":"Go net/http",...},"runtime":{"uptime_seconds":1466.32,...}} +``` + +--- + +## Production Considerations + +### Health checks +- **Liveness probe** on `/health` restarts stuck or crashed containers automatically. +- **Readiness probe** ensures no traffic is sent before the app finishes starting up. This matters for Flask since it has a startup phase. +- `initialDelaySeconds: 10` gives the app time to initialize before the first check. + +### Resource limits rationale +- **Requests** define the minimum guaranteed resources for scheduling. Without them the scheduler cannot place pods efficiently. +- **Limits** protect other pods on the same node from resource starvation caused by a misbehaving container. +- Python: `256Mi` RAM limit is safe for Flask + prometheus_client. Go needs far less (`128Mi`) due to lower runtime overhead. + +### Production improvements +- Use a specific image tag (e.g., `v1.2.3`) instead of `latest` to make deployments reproducible and auditable. +- Add `PodDisruptionBudget` to guarantee minimum available replicas during node maintenance. +- Use `HorizontalPodAutoscaler` (HPA) based on CPU/memory metrics for automatic scaling. +- Store secrets (API keys, DB passwords) in Kubernetes `Secret` resources, not in env vars in the manifest. +- Use `livenessProbe.failureThreshold` tuning: current `3` means restart after 3 failed checks × 10s = 30s, which is reasonable. + +### Monitoring and observability +- The Python app already exposes `/metrics` in Prometheus format — connect a Prometheus + Grafana stack (as in Lab 7/8). +- Use `kubectl logs -f ` for live log streaming. +- Integrate with a log aggregator (Loki/Fluentd) for centralized logging across replicas. + +--- + +## Challenges & Solutions + +### Challenge 1: Image pull errors (ErrImagePull / ImagePullBackOff) +Images were built only for `linux/amd64` but minikube on Apple Silicon (arm64) requires `linux/arm64`. + +```bash +kubectl describe pod +# Events showed: no matching manifest for linux/arm64/v8 in the manifest list entries +``` + +Fix: build images directly inside minikube's Docker daemon using `eval $(minikube docker-env)` and set `imagePullPolicy: Never`. + +```bash +eval $(minikube docker-env) +docker build --platform linux/arm64 -t mirana18/devops-info-service:latest ./app_python/ +``` + +### Challenge 2: Pods not ready +If readiness probe fails, pod stays in `0/1 Running` state. Debug: + +```bash +kubectl logs +kubectl describe pod +# check Events and Last State sections +``` + +### Challenge 3: NodePort not accessible on macOS Docker driver +minikube with Docker driver on macOS doesn't expose node IP directly to the host. + +Fix: use `kubectl port-forward` instead: +```bash +kubectl port-forward service/devops-python-service 8080:80 +``` + +### Challenge 4: Ingress — use port-forward instead of direct IP +Same networking limitation. Use: +```bash +kubectl port-forward -n ingress-nginx service/ingress-nginx-controller 8443:443 +curl -sk --resolve "devops.local:8443:127.0.0.1" https://devops.local:8443/app1/health +``` + +### What I learned +- Kubernetes reconciliation loop: you describe desired state, the controller continuously works to match it. +- Labels are the glue between Deployments and Services — they must match exactly. +- `RollingUpdate` with `maxUnavailable: 0` guarantees zero downtime but requires at least 2 replicas to be useful. +- Probes are not optional in production — without them, traffic can reach a container that is not ready yet. +- On Apple Silicon, Docker images must be built for `linux/arm64` or use multi-arch manifests. +- Ingress provides L7 routing (host/path-based) and TLS termination — much more flexible than NodePort. diff --git a/k8s/deployment-go.yml b/k8s/deployment-go.yml new file mode 100644 index 0000000000..6814673718 --- /dev/null +++ b/k8s/deployment-go.yml @@ -0,0 +1,53 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-go + labels: + app: devops-go + version: "1.0.0" +spec: + replicas: 2 + selector: + matchLabels: + app: devops-go + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: devops-go + version: "1.0.0" + spec: + containers: + - name: devops-go + image: mirana18/devops-info-service-go:latest + imagePullPolicy: Never + ports: + - containerPort: 8080 + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" + env: + - name: PORT + value: "8080" + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 10 + periodSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 3 diff --git a/k8s/deployment.yml b/k8s/deployment.yml new file mode 100644 index 0000000000..ccdd909662 --- /dev/null +++ b/k8s/deployment.yml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-python + labels: + app: devops-python + version: "1.0.0" +spec: + replicas: 3 + selector: + matchLabels: + app: devops-python + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: devops-python + version: "1.0.0" + spec: + containers: + - name: devops-python + image: mirana18/devops-info-service:latest + imagePullPolicy: Never + ports: + - containerPort: 5001 + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + cpu: "200m" + env: + - name: PORT + value: "5001" + - name: HOST + value: "0.0.0.0" + - name: DEBUG + value: "False" + livenessProbe: + httpGet: + path: /health + port: 5001 + initialDelaySeconds: 10 + periodSeconds: 10 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 5001 + initialDelaySeconds: 5 + periodSeconds: 5 + failureThreshold: 3 diff --git a/k8s/ingress.yml b/k8s/ingress.yml new file mode 100644 index 0000000000..78ef02f4cf --- /dev/null +++ b/k8s/ingress.yml @@ -0,0 +1,30 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: devops-ingress + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + ingressClassName: nginx + tls: + - hosts: + - devops.local + secretName: devops-tls-secret + rules: + - host: devops.local + http: + paths: + - path: /app1 + pathType: Prefix + backend: + service: + name: devops-python-service + port: + number: 80 + - path: /app2 + pathType: Prefix + backend: + service: + name: devops-go-service + port: + number: 80 diff --git a/k8s/service-go.yml b/k8s/service-go.yml new file mode 100644 index 0000000000..e4fabede8e --- /dev/null +++ b/k8s/service-go.yml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-go-service + labels: + app: devops-go +spec: + type: NodePort + selector: + app: devops-go + ports: + - protocol: TCP + port: 80 + targetPort: 8080 + nodePort: 30081 diff --git a/k8s/service.yml b/k8s/service.yml new file mode 100644 index 0000000000..c6ca3e3c9c --- /dev/null +++ b/k8s/service.yml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-python-service + labels: + app: devops-python +spec: + type: NodePort + selector: + app: devops-python + ports: + - protocol: TCP + port: 80 + targetPort: 5001 + nodePort: 30080 diff --git a/monitoring/.env.example b/monitoring/.env.example new file mode 100644 index 0000000000..8f46a9fa02 --- /dev/null +++ b/monitoring/.env.example @@ -0,0 +1,3 @@ +# Copy to .env and set your values (do not commit .env) +DOCKERHUB_USERNAME=your-dockerhub-username +GF_SECURITY_ADMIN_PASSWORD=your-grafana-admin-password diff --git a/monitoring/docker-compose.yml b/monitoring/docker-compose.yml new file mode 100644 index 0000000000..7f26c649a1 --- /dev/null +++ b/monitoring/docker-compose.yml @@ -0,0 +1,175 @@ +services: + prometheus: + image: prom/prometheus:v3.10.0 + container_name: prometheus + command: + - '--config.file=/etc/prometheus/prometheus.yml' + - '--storage.tsdb.retention.time=15d' + - '--storage.tsdb.retention.size=10GB' + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M + + loki: + image: grafana/loki:3.0.0 + container_name: loki + user: "0:0" + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + labels: + logging: "promtail" + app: "loki" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/var/loki + networks: + - logging + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget -q -O- http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '1.0' + memory: 1G + reservations: + cpus: '0.25' + memory: 256M + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + command: -config.file=/etc/promtail/config.yml + labels: + logging: "promtail" + app: "promtail" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + restart: unless-stopped + deploy: + resources: + limits: + cpus: '0.5' + memory: 512M + reservations: + cpus: '0.1' + memory: 128M + + grafana: + image: grafana/grafana:12.3.1 + container_name: grafana + ports: + - "3000:3000" + labels: + logging: "promtail" + app: "grafana" + environment: + - GF_AUTH_ANONYMOUS_ENABLED=false + - GF_SECURITY_ALLOW_EMBEDDING=true + - GF_SERVER_ROOT_URL=http://localhost:3000 + - GF_USERS_ALLOW_SIGN_UP=false + - GF_SECURITY_ADMIN_PASSWORD=${GF_SECURITY_ADMIN_PASSWORD:-admin} + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "wget -q -O- http://localhost:3000/api/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 15s + deploy: + resources: + limits: + cpus: '1.0' + memory: 512M + reservations: + cpus: '0.25' + memory: 256M + + app-python: + build: + context: ../app_python + dockerfile: Dockerfile + image: ${DOCKERHUB_USERNAME:-your-dockerhub-username}/devops-info-service:latest + platform: linux/amd64 + container_name: devops-python + ports: + - "8000:5001" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + restart: unless-stopped + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:5001/health || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: '0.5' + memory: 256M + + app-go: + build: + context: ../app_go + dockerfile: Dockerfile + image: ${DOCKERHUB_USERNAME:-your-dockerhub-username}/devops-info-service-go:latest + platform: linux/amd64 + container_name: devops-go + ports: + - "8001:8080" + networks: + - logging + labels: + logging: "promtail" + app: "devops-go" + restart: unless-stopped + deploy: + resources: + limits: + cpus: '0.5' + memory: 256M + +networks: + logging: + driver: bridge + +volumes: + loki-data: + grafana-data: + prometheus-data: diff --git a/monitoring/docs/1-1773943196684.json b/monitoring/docs/1-1773943196684.json new file mode 100644 index 0000000000..4574ccc1da --- /dev/null +++ b/monitoring/docs/1-1773943196684.json @@ -0,0 +1,588 @@ +{ + "annotations": { + "list": [ + { + "builtIn": 1, + "datasource": { + "type": "grafana", + "uid": "-- Grafana --" + }, + "enable": true, + "hide": true, + "iconColor": "rgba(0, 211, 255, 1)", + "name": "Annotations & Alerts", + "type": "dashboard" + } + ] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": 2, + "links": [], + "panels": [ + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 0 + }, + "id": 6, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (status_code) (rate(http_requests_total[5m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Status code distribution", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 0 + }, + "id": 3, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "editorMode": "code", + "expr": "histogram_quantile(0.95, sum by (le) (rate(http_request_duration_seconds_bucket[5m])))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Latency p95 (s)", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 8 + }, + "id": 5, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "editorMode": "code", + "expr": "http_requests_in_progress", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Active requests", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 8 + }, + "id": 2, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "editorMode": "code", + "expr": "up{job=\"app\"}", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Uptime", + "type": "timeseries" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "fieldConfig": { + "defaults": { + "custom": { + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "scaleDistribution": { + "type": "linear" + } + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 16 + }, + "id": 4, + "options": { + "calculate": false, + "cellGap": 1, + "color": { + "exponent": 0.5, + "fill": "dark-orange", + "mode": "scheme", + "reverse": false, + "scale": "exponential", + "scheme": "Oranges", + "steps": 64 + }, + "exemplars": { + "color": "rgba(255,0,255,0.7)" + }, + "filterValues": { + "le": 1e-9 + }, + "legend": { + "show": true + }, + "rowsFrame": { + "layout": "auto" + }, + "tooltip": { + "mode": "single", + "showColorScale": false, + "yHistogram": false + }, + "yAxis": { + "axisPlacement": "left", + "reverse": false + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "editorMode": "code", + "expr": "sum by (le) (rate(http_request_duration_seconds_bucket[5m]))", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Latency heatmap", + "type": "heatmap" + }, + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisBorderShow": false, + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "barWidthFactor": 0.6, + "drawStyle": "line", + "fillOpacity": 0, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "insertNulls": false, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "showValues": false, + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": 0 + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 16 + }, + "id": 1, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "hideZeros": false, + "mode": "single", + "sort": "none" + } + }, + "pluginVersion": "12.3.1", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "dfgazp8lw9ddsb" + }, + "editorMode": "code", + "expr": "sum(rate(http_requests_total[5m])) by (endpoint)", + "legendFormat": "__auto", + "range": true, + "refId": "A" + } + ], + "title": "Request rate (req/s)", + "type": "timeseries" + } + ], + "preload": false, + "schemaVersion": 42, + "tags": [], + "templating": { + "list": [] + }, + "time": { + "from": "2026-03-19T17:11:20.268Z", + "to": "2026-03-19T17:55:20.600Z" + }, + "timepicker": {}, + "timezone": "browser", + "title": "1", + "uid": "adg6qdf", + "version": 1 +} \ No newline at end of file diff --git a/monitoring/docs/LAB07.md b/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..5d8f911fe5 --- /dev/null +++ b/monitoring/docs/LAB07.md @@ -0,0 +1,195 @@ +# Lab 7: Observability & Logging with Loki Stack + +## 1. Architecture + +``` + ┌─────────────┐ + │ Grafana │ :3000 + │ (UI/Explore)│ + └──────┬──────┘ + │ queries + ┌──────▼──────┐ + │ Loki │ :3100 + │ (log store) │ + └──────▲──────┘ + │ push + ┌──────┴──────┐ + │ Promtail │ :9080 + │ (collector) │ + └──────▲──────┘ + │ read container logs + ┌─────────────────┼─────────────────┐ + │ │ │ + ┌─────▼─────┐ ┌──────▼──────┐ ┌──────▼──────┐ + │ app-python│ │ app-go │ │ other │ + │ :8000 │ │ :8001 │ │ containers │ + └───────────┘ └─────────────┘ └─────────────┘ +``` + +- **Loki** — stores logs (TSDB + filesystem), retention 7 days. +- **Promtail** — discovers Docker containers with label `logging=promtail`, reads logs, sends to Loki. +- **Grafana** — data source Loki, Explore and dashboards. + +--- + +## 2. Setup Guide + +```bash +cd monitoring +cp .env.example .env +# Edit .env: set DOCKERHUB_USERNAME and GF_SECURITY_ADMIN_PASSWORD + +docker compose up -d +docker compose ps +``` + +**Verify:** + +```bash +curl http://localhost:3100/ready # Loki +curl http://localhost:9080/targets # Promtail targets +open http://localhost:3000 # Grafana (login: admin / password from .env) +``` + +**Add Loki data source in Grafana:** Connections → Data sources → Add data source → Loki → URL `http://loki:3100` → Save & Test. + +--- + +## 3. Configuration + +### Loki (`loki/config.yml`) + +- **auth_enabled: false** — for lab; in production use auth. +- **server** — HTTP 3100, gRPC 9096. +- **schema_config** — v13, store tsdb, object_store filesystem (Loki 3.0). +- **storage_config** — filesystem for chunks, tsdb_shipper for index/cache. +- **limits_config** — **retention_period: 168h** (7 days). +- **compactor** — retention_enabled true, apply_retention_interval 10m. + +### Promtail (`promtail/config.yml`) + +- **clients** — push to `http://loki:3100/loki/api/v1/push`. +- **scrape_configs** — job `docker`, `docker_sd_configs` with filter `label=logging=promtail`. +- **relabel_configs** — `container` from container name, `app` from label `app`. + +--- + +## 4. Application Logging + +Python app (Lab 1) was updated for **JSON logging**: + +- **Library:** `python-json-logger` (JsonFormatter). +- **Fields:** timestamp, level, name, message + extra (method, path, status_code, client_ip, duration_ms). +- **Hooks:** `@app.before_request` logs request start; `@app.after_request` logs completion with status and duration. +- **Events:** startup, each HTTP request/response, errors (logger.error with exc_info). + +Example log line: + +```json +{"timestamp": "2026-03-07 12:00:00,123", "level": "INFO", "message": "Request completed", "method": "GET", "path": "/health", "status_code": 200, "client_ip": "172.18.0.1", "duration_ms": 1.45} +``` + +--- + +## 5. Dashboard + +Create a dashboard in Grafana with 4 panels: + +| Panel | Type | LogQL | +|--------------------|------------|--------| +| Logs Table | Logs | `{app=~"devops-.*"}` | +| Request Rate | Time series| `sum by (app) (rate({app=~"devops-.*"} [1m]))` | +| Error Logs | Logs | `{app=~"devops-.*"} \| json \| level="ERROR"` | +| Log Level Distribution | Stat/Pie | `sum by (level) (count_over_time({app=~"devops-.*"} \| json [5m]))` | + +**Example LogQL:** + +- All logs: `{app="devops-python"}` +- Only errors: `{app="devops-python"} |= "ERROR"` +- Parse JSON: `{app="devops-python"} | json | method="GET"` + +--- + +## 6. Production Config + +- **Resource limits** — in `docker-compose.yml` for loki, promtail, grafana, apps (deploy.resources.limits/reservations). +- **Grafana security** — GF_AUTH_ANONYMOUS_ENABLED=false, admin password via GF_SECURITY_ADMIN_PASSWORD in .env (do not commit .env). +- **Health checks** — Loki: `/ready`, Grafana: `/api/health` (interval 10s, start_period 10–15s). + +--- + +## 7. Testing + +```bash +# Generate traffic +for i in {1..20}; do curl -s http://localhost:8000/ > /dev/null; done +for i in {1..20}; do curl -s http://localhost:8000/health > /dev/null; done +for i in {1..10}; do curl -s http://localhost:8001/ > /dev/null; done + +# In Grafana Explore: run queries above; confirm logs from devops-python and devops-go. +``` + +--- + +## 8. Challenges + +- **Loki 3.0 config** — schema v13 and tsdb storage required; compactor retention_enabled must be true when retention_period is set. +- **Promtail filter** — only containers with label `logging=promtail` are scraped; apps in compose have this label. +- **Grafana auth** — anonymous access disabled; set admin password in .env for production. + +--- + +## Bonus: Ansible Automation + +The Loki stack is automated with the Ansible role **monitoring** and playbook **deploy-monitoring.yml**. See `ansible/roles/monitoring/` and run: + +```bash +cd ansible +ansible-playbook playbooks/deploy-monitoring.yml --ask-vault-pass +``` + +Templated configs (Loki, Promtail, docker-compose), idempotent deploy with `community.docker.docker_compose_v2`, dependency on role **docker**. + +--- + +## Evidence: + +**1. Grafana Explore** + +![Grafana Explore](../screenshots/explore-logs.png) + +**2. JSON-log** + +![JSON log](../screenshots/json.png) + +**3. Grafana Explore** + +![Logs both apps](../screenshots/explore-both-apps.png) + +**4. Grafana Explore** + +![3 LogQLs](../screenshots/3q.png) + +**5. Dashboard** + +![Dashboard 4 panels](../screenshots/dashboard.png) + +**6. docker compose ps** + +![docker compose ps](../screenshots/docker-ps.png) + +**7. Grafana login** + +![Grafana login](../screenshots/login.png) + +**8. Ansible playbook execution output** + +![Dashboard 4 panels](../screenshots/1.png) + +**9. Idempotency test** + +![docker compose ps](../screenshots/2.png) + +**10. Templated configuration files** + +![Grafana login](../screenshots/tcf.png) diff --git a/monitoring/docs/LAB08.md b/monitoring/docs/LAB08.md new file mode 100644 index 0000000000..8d6d74d7ff --- /dev/null +++ b/monitoring/docs/LAB08.md @@ -0,0 +1,92 @@ +# LAB08 — Metrics & Monitoring with Prometheus + +## Architecture + +Flow: + +- `app-python` exposes metrics on `GET /metrics` (Prometheus format) +- Prometheus scrapes targets every **15s** +- Grafana queries Prometheus to visualize metrics + +``` +app-python (/metrics) ---> Prometheus ---> Grafana dashboards + (logs) ---> Loki ---> Grafana logs +``` + +## Application Instrumentation + +### Implemented metrics (RED-style) + +- Counter: `http_requests_total{method,endpoint,status_code}` + - Total requests (rate) + errors (5xx) +- Histogram: `http_request_duration_seconds{method,endpoint}` + - Request latency distribution / p95 +- Gauge: `http_requests_in_progress` + - Concurrent requests (active in-flight) + +### Evidence +- Screenshot: `/metrics` output +![/metrics output](../screenshots/lab08-metrics.png) + +## Prometheus Configuration + +### Scrape targets + +- `prometheus` → `localhost:9090` +- `app` → `app-python:5001` (`/metrics`) +- `loki` → `loki:3100` (`/metrics`) +- `grafana` → `grafana:3000` (`/metrics`) + +### Retention + +- `--storage.tsdb.retention.time=15d` +- `--storage.tsdb.retention.size=10GB` + +### Evidence +- Screenshot: Prometheus `/targets` page with all targets **UP** +![Prometheus /targets page](../screenshots/lab08-prometheus-targets.png) + +## Grafana Dashboard + +Create a dashboard named for example `DevOps Info Service — Metrics` with panels: + +1. **Request rate (req/s)** + `sum(rate(http_requests_total[5m])) by (endpoint)` +2. **Latency p95 (s)** + `histogram_quantile(0.95, sum by (le) (rate(http_request_duration_seconds_bucket[5m])))` +3. **Latency heatmap** + `sum by (le) (rate(http_request_duration_seconds_bucket[5m]))` +4. **Active requests** + `http_requests_in_progress` +5. **Status code distribution** + `sum by (status_code) (rate(http_requests_total[5m]))` +6. **Uptime** + `up{job="app"}` + +### Evidence +- Screenshot: dashboard showing live data +![Grafana dashboard](../screenshots/lab08-grafana-dashboard.png) + + +## PromQL Examples + +- Up targets: `up` +- App up: `up{job="app"}` +- Total RPS: `sum(rate(http_requests_total[5m]))` +- RPS by endpoint: `sum by (endpoint) (rate(http_requests_total[5m]))` +- p95 latency: `histogram_quantile(0.95, sum by (le) (rate(http_request_duration_seconds_bucket[5m])))` + +## Production Setup + +- Healthchecks: Prometheus, Grafana, Loki, `app-python` (compose) +- Resource limits: configured in `monitoring/docker-compose.yml` +- Persistence: `prometheus-data`, `loki-data`, `grafana-data` + +## Testing Results + +1. `curl http://localhost:8000/metrics` returns Prometheus text format +2. Prometheus UI `http://localhost:9090/targets` shows all jobs **UP** +3. Grafana Prometheus datasource points to `http://prometheus:9090` +4. Dashboard panels show non-empty data after generating traffic + + diff --git a/monitoring/loki/config.yml b/monitoring/loki/config.yml new file mode 100644 index 0000000000..b1be316bb3 --- /dev/null +++ b/monitoring/loki/config.yml @@ -0,0 +1,52 @@ +auth_enabled: false + +server: + http_listen_address: 0.0.0.0 + http_listen_port: 3100 + grpc_listen_address: 0.0.0.0 + grpc_listen_port: 9096 + +common: + instance_addr: 0.0.0.0 + path_prefix: /var/loki + storage: + filesystem: + chunks_directory: /var/loki/chunks + rules_directory: /var/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + +query_scheduler: + max_outstanding_requests_per_tenant: 2048 + +schema_config: + configs: + - from: "2024-01-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + filesystem: + directory: /var/loki/chunks + tsdb_shipper: + active_index_directory: /var/loki/tsdb-index + cache_location: /var/loki/tsdb-cache + +limits_config: + retention_period: 168h # 7 days + +compactor: + working_directory: /var/loki/compactor + retention_enabled: true + compaction_interval: 10m + apply_retention_interval: 10m + delete_request_store: filesystem + +analytics: + reporting_enabled: false diff --git a/monitoring/prometheus/prometheus.yml b/monitoring/prometheus/prometheus.yml new file mode 100644 index 0000000000..c71eeb9d8f --- /dev/null +++ b/monitoring/prometheus/prometheus.yml @@ -0,0 +1,24 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: 'prometheus' + static_configs: + - targets: ['localhost:9090'] + + - job_name: 'app' + metrics_path: '/metrics' + static_configs: + - targets: ['app-python:5001'] + + - job_name: 'loki' + metrics_path: '/metrics' + static_configs: + - targets: ['loki:3100'] + + - job_name: 'grafana' + metrics_path: '/metrics' + static_configs: + - targets: ['grafana:3000'] + diff --git a/monitoring/promtail/config.yml b/monitoring/promtail/config.yml new file mode 100644 index 0000000000..2735459d5b --- /dev/null +++ b/monitoring/promtail/config.yml @@ -0,0 +1,28 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ['__meta_docker_container_name'] + regex: '/(.*)' + target_label: 'container' + - source_labels: ['__meta_docker_container_log_stream'] + target_label: 'logstream' + - source_labels: ['__meta_docker_container_label_logging'] + target_label: 'logging' + - source_labels: ['__meta_docker_container_label_app'] + target_label: 'app' diff --git a/monitoring/screenshots/1.png b/monitoring/screenshots/1.png new file mode 100644 index 0000000000..4a40c57123 Binary files /dev/null and b/monitoring/screenshots/1.png differ diff --git a/monitoring/screenshots/2.png b/monitoring/screenshots/2.png new file mode 100644 index 0000000000..473662aba3 Binary files /dev/null and b/monitoring/screenshots/2.png differ diff --git a/monitoring/screenshots/3q.png b/monitoring/screenshots/3q.png new file mode 100644 index 0000000000..4977368e9c Binary files /dev/null and b/monitoring/screenshots/3q.png differ diff --git a/monitoring/screenshots/dashboard.png b/monitoring/screenshots/dashboard.png new file mode 100644 index 0000000000..550828bd37 Binary files /dev/null and b/monitoring/screenshots/dashboard.png differ diff --git a/monitoring/screenshots/docker-ps.png b/monitoring/screenshots/docker-ps.png new file mode 100644 index 0000000000..648b1913ae Binary files /dev/null and b/monitoring/screenshots/docker-ps.png differ diff --git a/monitoring/screenshots/explore-both-apps.png b/monitoring/screenshots/explore-both-apps.png new file mode 100644 index 0000000000..62728a14f1 Binary files /dev/null and b/monitoring/screenshots/explore-both-apps.png differ diff --git a/monitoring/screenshots/explore-logs.png b/monitoring/screenshots/explore-logs.png new file mode 100644 index 0000000000..6f7aaa3a48 Binary files /dev/null and b/monitoring/screenshots/explore-logs.png differ diff --git a/monitoring/screenshots/json.png b/monitoring/screenshots/json.png new file mode 100644 index 0000000000..e613250ad8 Binary files /dev/null and b/monitoring/screenshots/json.png differ diff --git a/monitoring/screenshots/lab08-grafana-dashboard.png b/monitoring/screenshots/lab08-grafana-dashboard.png new file mode 100644 index 0000000000..e996bb6261 Binary files /dev/null and b/monitoring/screenshots/lab08-grafana-dashboard.png differ diff --git a/monitoring/screenshots/lab08-metrics.png b/monitoring/screenshots/lab08-metrics.png new file mode 100644 index 0000000000..6b1fc1bb80 Binary files /dev/null and b/monitoring/screenshots/lab08-metrics.png differ diff --git a/monitoring/screenshots/lab08-prometheus-targets.png b/monitoring/screenshots/lab08-prometheus-targets.png new file mode 100644 index 0000000000..b3dfbbb45c Binary files /dev/null and b/monitoring/screenshots/lab08-prometheus-targets.png differ diff --git a/monitoring/screenshots/login.png b/monitoring/screenshots/login.png new file mode 100644 index 0000000000..6c7f89f558 Binary files /dev/null and b/monitoring/screenshots/login.png differ diff --git a/monitoring/screenshots/tcf.png b/monitoring/screenshots/tcf.png new file mode 100644 index 0000000000..e423af391c Binary files /dev/null and b/monitoring/screenshots/tcf.png differ diff --git a/pulumi/.gitignore b/pulumi/.gitignore new file mode 100644 index 0000000000..a3807e5bdb --- /dev/null +++ b/pulumi/.gitignore @@ -0,0 +1,2 @@ +*.pyc +venv/ diff --git a/pulumi/Pulumi.dev.yaml b/pulumi/Pulumi.dev.yaml new file mode 100644 index 0000000000..eeb3086a76 --- /dev/null +++ b/pulumi/Pulumi.dev.yaml @@ -0,0 +1,4 @@ +config: + yandex:serviceAccountKeyFile: /Users/arinazimina/Downloads/authorized_key(1).json + yandex:folderId: b1gff0j67atu07bsqe14 + python_pulumi:sshPublicKey: ssh-ed25519 AAAAC3NzaC1lZDI1NTE5AAAAIJrdGukPSOFXySoBrNeDTwqafjO8lx2IrM0GyzSycpDN arinazimina@arino4ka diff --git a/pulumi/Pulumi.yaml b/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..7c52e3f280 --- /dev/null +++ b/pulumi/Pulumi.yaml @@ -0,0 +1,11 @@ +name: python_pulumi +description: A minimal Python Pulumi program +runtime: + name: python + options: + toolchain: pip + virtualenv: venv +config: + pulumi:tags: + value: + pulumi:template: python diff --git a/pulumi/__main__.py b/pulumi/__main__.py new file mode 100644 index 0000000000..df82788543 --- /dev/null +++ b/pulumi/__main__.py @@ -0,0 +1,141 @@ +""" +Yandex Cloud resources via Pulumi. +Auth: either set token or service account key file before running: + pulumi config set yandex:token YOUR_TOKEN --secret + pulumi config set yandex:folderId YOUR_FOLDER_ID + # or key file: + pulumi config set yandex:serviceAccountKeyFile /path/to/key.json +""" +import pulumi +import pulumi_yandex as yandex + +config = pulumi.Config("yandex") +folder_id = config.require("folderId") + +ssh_public_key = pulumi.Config().get("sshPublicKey") or "" + +# --------------------------- +# Network +# --------------------------- +network = yandex.VpcNetwork( + "lab-network", + folder_id=folder_id, +) + +subnet = yandex.VpcSubnet( + "lab-subnet", + folder_id=folder_id, + zone="ru-central1-a", + network_id=network.id, + v4_cidr_blocks=["10.0.0.0/24"], +) + +# --------------------------- +# Security Group +# --------------------------- +sg = yandex.VpcSecurityGroup( + "lab-sg", + folder_id=folder_id, + network_id=network.id, +) + +# --------------------------- +# Security Group Rules +# --------------------------- +yandex.VpcSecurityGroupRule( + "ssh-rule", + security_group_binding=sg.id, + direction="ingress", + protocol="TCP", + port=22, + v4_cidr_blocks=["0.0.0.0/0"] +) + +yandex.VpcSecurityGroupRule( + "http-rule", + security_group_binding=sg.id, + direction="ingress", + protocol="TCP", + port=80, + v4_cidr_blocks=["0.0.0.0/0"] +) + +yandex.VpcSecurityGroupRule( + "app-rule", + security_group_binding=sg.id, + direction="ingress", + protocol="TCP", + port=5000, + v4_cidr_blocks=["0.0.0.0/0"] +) + +# App port 5001 (devops-info-service, legacy) +yandex.VpcSecurityGroupRule( + "app-5001-rule", + security_group_binding=sg.id, + direction="ingress", + protocol="TCP", + port=5001, + v4_cidr_blocks=["0.0.0.0/0"], +) + +# App port 8000 (Lab 6: Python app) +yandex.VpcSecurityGroupRule( + "app-8000-rule", + security_group_binding=sg.id, + direction="ingress", + protocol="TCP", + port=8000, + v4_cidr_blocks=["0.0.0.0/0"], +) + +# App port 8001 (Lab 6 Bonus: Go app) +yandex.VpcSecurityGroupRule( + "app-8001-rule", + security_group_binding=sg.id, + direction="ingress", + protocol="TCP", + port=8001, + v4_cidr_blocks=["0.0.0.0/0"], +) + +# Egress: allow VM to reach internet (apt, Docker Hub, etc.) +yandex.VpcSecurityGroupRule( + "egress-all", + security_group_binding=sg.id, + direction="egress", + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"], +) + +# --------------------------- +# VM +# --------------------------- +vm_metadata = {"ssh-keys": f"ubuntu:{ssh_public_key}"} if ssh_public_key else None +vm = yandex.ComputeInstance( + "lab-vm", + folder_id=folder_id, + zone="ru-central1-a", + resources=yandex.ComputeInstanceResourcesArgs( + cores=2, + memory=2, + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id="fd80293ig2816a78q276", # Ubuntu 22.04 LTS + ), + ), + metadata=vm_metadata, + network_interfaces=[ + yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + security_group_ids=[sg.id], + ) + ], +) + +# --------------------------- +# Outputs +# --------------------------- +pulumi.export("public_ip", vm.network_interfaces[0].nat_ip_address) \ No newline at end of file diff --git a/pulumi/requirements.txt b/pulumi/requirements.txt new file mode 100644 index 0000000000..4fcd3c0981 --- /dev/null +++ b/pulumi/requirements.txt @@ -0,0 +1,2 @@ +pulumi>=3.0.0,<4.0.0 +pulumi-yandex diff --git a/ydb_terraform/.gitignore b/ydb_terraform/.gitignore new file mode 100644 index 0000000000..82c68586e6 --- /dev/null +++ b/ydb_terraform/.gitignore @@ -0,0 +1,6 @@ +*.tfstate +*.tfstate.* +.terraform/ +terraform.tfvars +*.pem +*.key diff --git a/ydb_terraform/.terraform.lock.hcl b/ydb_terraform/.terraform.lock.hcl new file mode 100644 index 0000000000..690c5bbdd3 --- /dev/null +++ b/ydb_terraform/.terraform.lock.hcl @@ -0,0 +1,9 @@ +# This file is maintained automatically by "terraform init". +# Manual edits may be lost in future updates. + +provider "registry.terraform.io/yandex-cloud/yandex" { + version = "0.187.0" + hashes = [ + "h1:+uf4EBRLDwNYIvZsGK/ZUzN3sGzJaXcUngyYSIJoyyQ=", + ] +} diff --git a/ydb_terraform/main.tf b/ydb_terraform/main.tf new file mode 100644 index 0000000000..1a243f437f --- /dev/null +++ b/ydb_terraform/main.tf @@ -0,0 +1,66 @@ +resource "yandex_vpc_network" "network" { + name = "terraform-network" +} + +resource "yandex_vpc_subnet" "subnet" { + name = "terraform-subnet" + zone = var.zone + network_id = yandex_vpc_network.network.id + v4_cidr_blocks = ["10.0.0.0/24"] +} + +resource "yandex_vpc_security_group" "sg" { + name = "terraform-sg" + network_id = yandex_vpc_network.network.id + + ingress { + protocol = "TCP" + description = "SSH" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 22 + } + + ingress { + protocol = "TCP" + description = "HTTP" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 80 + } + + ingress { + protocol = "TCP" + description = "App 5000" + v4_cidr_blocks = ["0.0.0.0/0"] + port = 5000 + } + + egress { + protocol = "ANY" + v4_cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "yandex_compute_instance" "vm" { + name = var.vm_name + + resources { + cores = 2 + memory = 2 + } + + boot_disk { + initialize_params { + image_id = var.image_id + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.subnet.id + nat = true + security_group_ids = [yandex_vpc_security_group.sg.id] + } + + metadata = { + ssh-keys = "${var.ssh_user}:${file(var.public_key_path)}" + } +} diff --git a/ydb_terraform/outputs.tf b/ydb_terraform/outputs.tf new file mode 100644 index 0000000000..ad6e3a5b26 --- /dev/null +++ b/ydb_terraform/outputs.tf @@ -0,0 +1,4 @@ +output "vm_public_ip" { + description = "Public IP address" + value = yandex_compute_instance.vm.network_interface[0].nat_ip_address +} diff --git a/ydb_terraform/provider.tf b/ydb_terraform/provider.tf new file mode 100644 index 0000000000..9514396fda --- /dev/null +++ b/ydb_terraform/provider.tf @@ -0,0 +1,14 @@ +terraform { + required_providers { + yandex = { + source = "yandex-cloud/yandex" + version = "~> 0.180" + } + } +} + +provider "yandex" { + cloud_id = var.cloud_id + folder_id = var.folder_id + zone = var.zone +} diff --git a/ydb_terraform/variables.tf b/ydb_terraform/variables.tf new file mode 100644 index 0000000000..cf983f6e90 --- /dev/null +++ b/ydb_terraform/variables.tf @@ -0,0 +1,37 @@ +variable "cloud_id" { + description = "Yandex Cloud ID" + type = string +} + +variable "folder_id" { + description = "Yandex Folder ID" + type = string +} + +variable "zone" { + description = "Zone" + type = string + default = "ru-central1-a" +} + +variable "vm_name" { + description = "VM name" + type = string + default = "terraform-vm" +} + +variable "image_id" { + description = "Ubuntu image ID" + type = string +} + +variable "ssh_user" { + description = "SSH user" + type = string + default = "ubuntu" +} + +variable "public_key_path" { + description = "Path to SSH public key" + type = string +}