diff --git a/.github/workflows/ansible-deploy-bonus.yml b/.github/workflows/ansible-deploy-bonus.yml new file mode 100644 index 0000000000..0c66229ccb --- /dev/null +++ b/.github/workflows/ansible-deploy-bonus.yml @@ -0,0 +1,94 @@ +--- +name: Ansible Deploy Bonus (Go App) + +on: + push: + branches: [master] + paths: + - "labs-work/ansible/roles/web_app/**" + - "labs-work/ansible/vars/app_bonus.yml" + - "labs-work/ansible/playbooks/deploy_bonus.yml" + - "labs-work/ansible/playbooks/deploy_all.yml" + pull_request: + branches: [master] + paths: + - "labs-work/ansible/roles/web_app/**" + - "labs-work/ansible/vars/app_bonus.yml" + - "labs-work/ansible/playbooks/deploy_bonus.yml" + - "labs-work/ansible/playbooks/deploy_all.yml" + workflow_dispatch: + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and ansible-lint + run: | + pip install ansible ansible-lint + + - name: Run ansible-lint + working-directory: labs-work/ansible + run: | + ansible-lint playbooks/deploy_bonus.yml + + deploy: + name: Deploy Go Application + runs-on: ubuntu-latest + needs: lint + if: github.ref == 'refs/heads/master' + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible + run: | + pip install ansible + + - name: Setup SSH key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/devops-lab04 + chmod 600 ~/.ssh/devops-lab04 + ssh-keyscan -H ${{ secrets.VM_HOST }} >> ~/.ssh/known_hosts 2>/dev/null + + - name: Write vault password file + run: | + echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass + chmod 600 /tmp/vault_pass + + - name: Update inventory with VM host + working-directory: labs-work/ansible + run: | + sed -i "s/ansible_host=.*/ansible_host=${{ secrets.VM_HOST }}/" inventory/hosts.ini + + - name: Run deployment playbook + working-directory: labs-work/ansible + run: | + ansible-playbook playbooks/deploy_bonus.yml \ + --vault-password-rile /tmp/vault_pass \ + --private-key ~/.ssh/devops-lab04 \ + --tags app_deploy + + - name: Verify deployment + run: | + sleep 5 + curl -f http://${{ secrets.VM_HOST }}:8001/health + + - name: Cleanup vault password file + if: always() + run: | + rm -f /tmp/vault_pass diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..462cb5afb6 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,90 @@ +--- +name: Ansible Deploy + +on: + push: + branches: [master] + paths: + - "labs-work/ansible/**" + - "!labs-work/ansible/docs/**" + pull_request: + branches: [master] + paths: + - "labs-work/ansible/**" + - "!labs-work/ansible/docs/**" + workflow_dispatch: + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and ansible-lint + run: | + pip install ansible ansible-lint + + - name: Run ansible-lint + working-directory: labs-work/ansible + run: | + ansible-lint playbooks/*.yml + + deploy: + name: Deploy Application + runs-on: ubuntu-latest + needs: lint + if: github.ref == 'refs/heads/master' + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible + run: | + pip install ansible + + - name: Setup SSH key + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/devops-lab04 + chmod 600 ~/.ssh/devops-lab04 + ssh-keyscan -H ${{ secrets.VM_HOST }} >> ~/.ssh/known_hosts 2>/dev/null + + - name: Write vault password file + run: | + echo "${{ secrets.ANSIBLE_VAULT_PASSWORD }}" > /tmp/vault_pass + chmod 600 /tmp/vault_pass + + - name: Update inventory with VM host + working-directory: labs-work/ansible + run: | + sed -i "s/ansible_host=.*/ansible_host=${{ secrets.VM_HOST }}/" inventory/hosts.ini + + - name: Run deployment playbook + working-directory: labs-work/ansible + run: | + ansible-playbook playbooks/deploy.yml \ + --vault-password-file /tmp/vault_pass \ + --private-key ~/.ssh/devops-lab04 \ + --tags app_deploy + + - name: Verify deployment + run: | + sleep 5 + curl -f http://${{ secrets.VM_HOST }}:5000/health + + - name: Cleanup vault password file + if: always() + run: | + rm -f /tmp/vault_pass diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml new file mode 100644 index 0000000000..239691e911 --- /dev/null +++ b/.github/workflows/go-ci.yml @@ -0,0 +1,99 @@ +name: Go CI + +on: + push: + branches: [main, master] + paths: + - 'labs-work/app_go/**' + - '.github/workflows/go-ci.yml' + pull_request: + branches: [main, master] + paths: + - 'labs-work/app_go/**' + - '.github/workflows/go-ci.yml' + +env: + DOCKER_IMAGE: mashfeii/devops-info-service-go + GO_VERSION: '1.21' + +jobs: + test: + name: Lint and Test + runs-on: ubuntu-latest + defaults: + run: + working-directory: labs-work/app_go + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: ${{ env.GO_VERSION }} + cache-dependency-path: labs-work/app_go/go.mod + + - name: Run golangci-lint + uses: golangci/golangci-lint-action@v4 + with: + working-directory: labs-work/app_go + version: latest + + - name: Run tests with coverage + run: | + go test -v -coverprofile=coverage.out -covermode=atomic ./... + go tool cover -func=coverage.out + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: labs-work/app_go/coverage.out + flags: go + token: ${{ secrets.CODECOV_TOKEN }} + if: always() + + build: + name: Build and Push Docker + runs-on: ubuntu-latest + needs: test + if: github.event_name == 'push' && github.ref == 'refs/heads/master' + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Generate CalVer version + id: version + run: | + echo "VERSION=$(date +%Y.%m.%d)" >> $GITHUB_OUTPUT + echo "SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_IMAGE }} + tags: | + type=raw,value=${{ steps.version.outputs.VERSION }} + type=raw,value=latest + type=sha,prefix=,format=short + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: labs-work/app_go + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..3614c7e196 --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,130 @@ +name: Python CI + +on: + push: + branches: [main, master] + paths: + - 'labs-work/app_python/**' + - '.github/workflows/python-ci.yml' + pull_request: + branches: [main, master] + paths: + - 'labs-work/app_python/**' + - '.github/workflows/python-ci.yml' + +env: + DOCKER_IMAGE: mashfeii/devops-info-service + PYTHON_VERSION: '3.13' + +jobs: + test: + name: Lint and Test + runs-on: ubuntu-latest + defaults: + run: + working-directory: labs-work/app_python + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + cache: 'pip' + cache-dependency-path: | + labs-work/app_python/requirements.txt + labs-work/app_python/requirements-dev.txt + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt + pip install -r requirements-dev.txt + + - name: Lint with ruff + run: ruff check . --output-format=github + + - name: Run tests with coverage + run: pytest --cov=. --cov-report=xml --cov-report=term --cov-fail-under=70 + + - name: Upload coverage to Codecov + uses: codecov/codecov-action@v4 + with: + files: labs-work/app_python/coverage.xml + flags: python + token: ${{ secrets.CODECOV_TOKEN }} + if: always() + + security: + name: Security Scan + runs-on: ubuntu-latest + needs: test + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ env.PYTHON_VERSION }} + + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install -r labs-work/app_python/requirements.txt + + - name: Run Snyk to check for vulnerabilities + uses: snyk/actions/python@master + env: + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + with: + args: --file=labs-work/app_python/requirements.txt --severity-threshold=high + continue-on-error: true + + build: + name: Build and Push Docker + runs-on: ubuntu-latest + needs: [test, security] + if: github.event_name == 'push' && github.ref == 'refs/heads/master' + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Generate CalVer version + id: version + run: | + echo "VERSION=$(date +%Y.%m.%d)" >> $GITHUB_OUTPUT + echo "SHA_SHORT=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT + + - name: Docker meta + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ env.DOCKER_IMAGE }} + tags: | + type=raw,value=${{ steps.version.outputs.VERSION }} + type=raw,value=latest + type=sha,prefix=,format=short + + - name: Build and push Docker image + uses: docker/build-push-action@v6 + with: + context: labs-work/app_python + push: true + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/terraform-ci.yml b/.github/workflows/terraform-ci.yml new file mode 100644 index 0000000000..c81da36524 --- /dev/null +++ b/.github/workflows/terraform-ci.yml @@ -0,0 +1,52 @@ +name: Terraform CI + +on: + push: + branches: [main, master] + paths: + - 'labs-work/terraform/**' + - '.github/workflows/terraform-ci.yml' + pull_request: + branches: [main, master] + paths: + - 'labs-work/terraform/**' + - '.github/workflows/terraform-ci.yml' + +env: + TERRAFORM_VERSION: '1.9' + TF_WORKING_DIR: labs-work/terraform + +jobs: + validate: + name: Validate Terraform + runs-on: ubuntu-latest + defaults: + run: + working-directory: labs-work/terraform + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: ${{ env.TERRAFORM_VERSION }} + + - name: Setup TFLint + uses: terraform-linters/setup-tflint@v4 + + - name: Check formatting + run: terraform fmt -check -recursive + + - name: Initialize Terraform + run: terraform init -backend=false + + - name: Validate configuration + run: terraform validate + + - name: Initialize TFLint + run: tflint --init + + - name: Run TFLint + run: tflint --format compact diff --git a/.gitignore b/.gitignore index 30d74d2584..4a66d6744c 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,36 @@ -test \ No newline at end of file +*.exe + +.vscode/ +.idea/ + +.DS_Store + +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars +crash.log +crash.*.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Pulumi +pulumi/venv/ +pulumi/.venv/ +labs-work/pulumi/venv/ +labs-work/pulumi/.venv/ +Pulumi.*.yaml + +# Cloud credentials +*.pem +*.key + +# Ansible +*.retry +.vault_pass +__pycache__/ diff --git a/labs-work/ansible/ansible.cfg b/labs-work/ansible/ansible.cfg new file mode 100644 index 0000000000..0ddcbf1672 --- /dev/null +++ b/labs-work/ansible/ansible.cfg @@ -0,0 +1,11 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/labs-work/ansible/docs/LAB05.md b/labs-work/ansible/docs/LAB05.md new file mode 100644 index 0000000000..e9544565f5 --- /dev/null +++ b/labs-work/ansible/docs/LAB05.md @@ -0,0 +1,266 @@ +# Lab 05 - Ansible Fundamentals + +## Overview + +- Automating VM provisioning and application deployment using Ansible +- Building role-based configuration with common, Docker, and app deployment roles +- Implementing idempotent playbooks for repeatable infrastructure management +- Using Ansible Vault for secure credential management +- Bonus: Dynamic inventory with Yandex Cloud plugin + +## Architecture Overview + +### Tool Versions + +| Tool | Version | +| ----------------- | ----------------------------------- | +| Ansible | 2.16+ | +| Target OS | Ubuntu 24.04 LTS | +| Docker CE | Latest | +| Python (target) | 3.x | +| Application Image | mashfeii/devops-info-service:latest | + +### Role Structure + +``` +labs-work/ansible/ +├── ansible.cfg +├── inventory/ +│ ├── hosts.ini # Static inventory (fill in VM IP) +│ ├── yandex_cloud.py # Bonus: dynamic inventory script +│ └── group_vars/ +│ └── all.yml # Vault-encrypted credentials +├── roles/ +│ ├── common/ # System packages and timezone +│ │ ├── tasks/main.yml +│ │ └── defaults/main.yml +│ ├── docker/ # Docker CE installation +│ │ ├── tasks/main.yml +│ │ ├── handlers/main.yml +│ │ └── defaults/main.yml +│ └── app_deploy/ # Container deployment +│ ├── tasks/main.yml +│ ├── handlers/main.yml +│ └── defaults/main.yml +├── playbooks/ +│ ├── site.yml # Full pipeline (provision + deploy) +│ ├── provision.yml # System setup (common + docker) +│ └── deploy.yml # App deployment only +└── docs/ + └── LAB05.md +``` + +### Network Architecture + +``` +User -> http://:5000 -> Docker (host:5000 -> container:5173) -> Flask App +``` + +The host port 5000 is mapped to container port 5173 (Flask app's listening port). Port 5000 is already allowed in the Yandex Cloud security group from Lab 04. + +## Roles Documentation + +### Common Role + +**Purpose:** Prepare the base system with essential packages and correct timezone + +**Variables:** + +| Variable | Default | Description | +| ----------------- | ------- | -------------------------- | +| `common_packages` | list | System packages to install | +| `common_timezone` | `UTC` | Server timezone | + +**Tasks:** + +1. Update apt cache (skips if updated within last hour via `cache_valid_time: 3600`) +2. Install common packages (`state: present` - idempotent, no reinstall if present) +3. Set timezone using `community.general.timezone` + +**Idempotency:** The `cache_valid_time` parameter avoids redundant apt updates. Package installation with `state: present` is a no-op when packages are already installed. + +### Docker Role + +**Purpose:** Install Docker CE from official repository using modern GPG key management + +**Variables:** + +| Variable | Default | Description | +| --------------------- | ------------------------------ | --------------------------- | +| `docker_user` | `ubuntu` | User added to docker group | +| `docker_packages` | list | Docker packages to install | +| `docker_gpg_url` | Docker official URL | GPG key download URL | +| `docker_gpg_key_path` | `/etc/apt/keyrings/docker.asc` | GPG key storage path | +| `docker_arch` | auto-detected | System architecture mapping | + +**Tasks:** + +1. Create `/etc/apt/keyrings` directory +2. Download Docker GPG key via `get_url` (not deprecated `apt_key`) +3. Add Docker apt repository with `signed-by` parameter +4. Install Docker packages +5. Ensure Docker service is started and enabled +6. Add user to docker group +7. Install `python3-docker` via apt (required for `community.docker` modules, installed via apt to comply with Ubuntu 24.04 PEP 668 externally-managed restriction) + +**Handler:** Restart docker - triggered when Docker packages are installed or updated + +**Dependencies:** Common role (for `ca-certificates`, `gnupg`, `curl`) + +### App Deploy Role + +**Purpose:** Pull and run the Flask application as a Docker container with health verification + +**Variables:** + +| Variable | Default | Description | +| -------------------- | ------------------------------ | --------------------------- | +| `app_host_port` | `5000` | Port exposed on the host | +| `app_container_port` | `5173` | Flask app port in container | +| `app_image` | `mashfeii/devops-info-service` | Docker image name | +| `app_image_tag` | `latest` | Image tag | +| `app_container_name` | `devops-info-service` | Container name | +| `app_restart_policy` | `unless-stopped` | Docker restart policy | + +**Tasks:** + +1. Log in to Docker Hub (`no_log: true` to hide credentials) +2. Pull application image (`force_source: true` to always check for updates) +3. Run container with port mapping `5000:5173` and restart policy +4. Wait for port 5000 to be available +5. Health check at `/health` endpoint with retries + +**Handler:** Restart app container - can be triggered by configuration changes + +**Dependencies:** Docker role (Docker must be installed and running) + +## Idempotency Demonstration + +### First Run + +![provision-first-run](screenshots/provision-first-run.png) + +### Second Run (Idempotency Proof) + +![provision-second-run](screenshots/provision-second-run.png) + +Key indicators of idempotency: + +- `cache_valid_time: 3600` prevents repeated apt updates +- `state: present` does not reinstall existing packages +- Docker GPG key download is skipped if file exists with correct mode +- Docker service enable/start is a no-op when already running + +## Ansible Vault Usage + +### Encryption Process + +The `group_vars/all.yml` file contains Docker Hub credentials and is encrypted with Ansible Vault before committing. + +```bash +cd labs-work/ansible +ansible-vault encrypt inventory/group_vars/all.yml +``` + +### Encryption Evidence + +![vault-encrypted](screenshots/vault-encrypted.png) + +### Using Vault in Playbooks + +Playbooks that need vault variables are run with `--ask-vault-pass`: + +```bash +ansible-playbook playbooks/deploy.yml --ask-vault-pass +``` + +## Deployment Verification + +### Running Containers + +![docker-ps](screenshots/docker-ps.png) + +### Health Check + +![health-check](screenshots/health-check.png) + +### Deploy Playbook Output + +![deploy-output](screenshots/deploy-output.png) + +## Key Decisions + +### Why Role-Based Structure Instead of a Single Playbook + +Roles provide logical separation of concerns: system preparation (common), container runtime (docker), and application deployment (app_deploy). Each role can be developed, tested, and reused independently. The provision and deploy playbooks compose these roles differently, allowing infrastructure setup without redeploying the app, or redeploying without reprovisioning. This mirrors real-world separation where different teams may own different layers. + +### Why `python3-docker` via apt Instead of pip + +Ubuntu 24.04 enforces PEP 668, marking the system Python as "externally managed." Running `pip install docker` outside a virtual environment fails with an error. Since Ansible runs system-wide (not in a venv), the `python3-docker` apt package is the correct way to provide the Python Docker SDK. This avoids `--break-system-packages` hacks and keeps the system package manager as the single source of truth for system-level dependencies. + +### Why `get_url` Instead of `apt_key` for Docker GPG Key + +The `apt_key` module is deprecated in Ansible because the underlying `apt-key` command is deprecated in modern Debian/Ubuntu. The modern approach stores GPG keys in `/etc/apt/keyrings/` and references them via the `signed-by` parameter in the repository definition. This is more secure (per-repository key scope) and future-proof. + +### Why Host Port 5000 Maps to Container Port 5173 + +The Flask application inside the container listens on port 5173 (as configured in Lab 01). The Yandex Cloud security group from Lab 04 already allows ingress on port 5000. Rather than modifying the security group or the application's internal port, the Docker port mapping bridges the two: external traffic arrives on 5000 and is forwarded to 5173 inside the container. + +### Why `force_source: true` on Image Pull + +Without `force_source`, the `docker_image` module skips pulling if an image with the same name:tag exists locally. Since we use the `latest` tag, the local image could be stale. `force_source: true` ensures every deployment checks Docker Hub for a newer version of the image, guaranteeing the most recent build is always deployed. + +## Bonus: Dynamic Inventory + +### Script-Based Approach + +There is no official `yandex.cloud` Ansible Galaxy collection, so the dynamic inventory uses a Python script (`inventory/yandex_cloud.py`) that calls the `yc` CLI directly. + +**Prerequisites:** + +1. `yc` CLI installed and authenticated (`yc init`) +2. Set `YC_FOLDER_ID` environment variable: `export YC_FOLDER_ID="your-folder-id"` +3. Authenticate: `export YC_TOKEN=$(yc iam create-token)` + +**How it works:** + +- Calls `yc compute instance list` to discover VMs +- Filters for `RUNNING` instances only +- Extracts the NAT IP address as `ansible_host` +- Groups VMs by their `project` label (VMs labeled `project=devops-course` go into the `webservers` group) +- Uses VM name as the hostname +- Supports the standard `--list` and `--host` Ansible inventory interface + +### Usage + +```bash +# Set required env vars +export YC_FOLDER_ID="your-folder-id" +export YC_TOKEN=$(yc iam create-token) + +# List discovered hosts +ansible-inventory -i inventory/yandex_cloud.py --list + +# Use with playbooks +ansible-playbook -i inventory/yandex_cloud.py playbooks/site.yml --ask-vault-pass +``` + +![dynamic-inventory](screenshots/dynamic-inventory.png) +![dynamic-inventory-playbook](screenshots/dynamic-inventory-playbook.png) + +## Challenges and Solutions + +### Challenge 1: PEP 668 Blocking pip Install on Ubuntu 24.04 + +**Problem:** Attempting to install the Docker Python SDK via `pip install docker` on Ubuntu 24.04 fails because the system Python is marked as "externally managed" per PEP 668 +**Solution:** Used `ansible.builtin.apt` to install `python3-docker` from Ubuntu's package repository instead of pip, which provides the same Docker SDK without violating the externally-managed environment restriction + +### Challenge 2: Docker GPG Key Management + +**Problem:** The traditional `apt_key` approach is deprecated and Ansible's `apt_key` module shows deprecation warnings +**Solution:** Downloaded the GPG key to `/etc/apt/keyrings/docker.asc` using `get_url` and referenced it via the `signed-by` parameter in the apt repository definition, following Docker's own installation documentation for modern Ubuntu + +### Challenge 3: Ensuring Idempotent Apt Cache Updates + +**Problem:** Running `apt update` on every playbook execution wastes time and bandwidth, especially during development with frequent re-runs +**Solution:** Used `cache_valid_time: 3600` parameter on the apt module, which only refreshes the cache if the last update was more than one hour ago, making repeated runs fast without risking stale package lists diff --git a/labs-work/ansible/docs/LAB06.md b/labs-work/ansible/docs/LAB06.md new file mode 100644 index 0000000000..7b1835ea60 --- /dev/null +++ b/labs-work/ansible/docs/LAB06.md @@ -0,0 +1,264 @@ +# Lab 06 - Advanced Ansible: Blocks, Tags, Docker Compose & CI/CD + +## Overview + +- Refactored roles with blocks (error handling) and tags (selective execution) +- Migrated from `docker_container` to Docker Compose with Jinja2 templates +- Added wipe logic for clean app removal and redeployment +- Created CI/CD pipeline with GitHub Actions for linting and deployment +- Bonus: multi-app deployment for Python and Go services + +## Architecture Overview + +### Tool Versions + +| Tool | Version | +| ---------------- | -------------------------------------- | +| Ansible | 2.16+ | +| Docker Compose | v2 (plugin) | +| Target OS | Ubuntu 24.04 LTS | +| Python App Image | mashfeii/devops-info-service:latest | +| Go App Image | mashfeii/devops-info-service-go:latest | + +### Role Structure + +``` +labs-work/ansible/ +├── ansible.cfg +├── inventory/ +│ ├── hosts.ini +│ └── group_vars/ +│ └── all.yml # Vault-encrypted credentials +├── vars/ +│ ├── app_python.yml # Python app variables +│ └── app_bonus.yml # Go app variables (bonus) +├── roles/ +│ ├── common/ # System packages with blocks+tags +│ │ ├── tasks/main.yml +│ │ └── defaults/main.yml +│ ├── docker/ # Docker CE with blocks+tags +│ │ ├── tasks/main.yml +│ │ ├── handlers/main.yml +│ │ └── defaults/main.yml +│ └── web_app/ # Docker Compose deployment +│ ├── tasks/main.yml +│ ├── tasks/wipe.yml +│ ├── handlers/main.yml +│ ├── defaults/main.yml +│ ├── meta/main.yml # Docker role dependency +│ └── templates/ +│ └── docker-compose.yml.j2 +├── playbooks/ +│ ├── site.yml # Full pipeline +│ ├── provision.yml # System setup +│ ├── deploy.yml # Default app deployment +│ ├── deploy_python.yml # Python-specific deployment +│ ├── deploy_bonus.yml # Go app deployment (bonus) +│ └── deploy_all.yml # Deploy both apps +└── docs/ + └── LAB06.md +``` + +## Blocks and Error Handling + +Blocks group related tasks and add error handling with `rescue` (runs on failure) and `always` (runs regardless) sections. + +### Common Role + +Two blocks: + +**packages** (`tags: packages, common`) - apt cache update, package install, timezone setup + +- Rescue: retries apt update, then retries packages +- Always: writes log to `/tmp/ansible-common-complete.log` + +**users** (`tags: users, common`) - verifies primary system user exists + +![provision-blocks](screenshots/provision-blocks.png) + +### Docker Role + +Two blocks: + +**docker_install** (`tags: docker_install, docker`) - GPG key, apt repo, package install + +- Rescue: pauses 10s, retries apt update and install +- Always: ensures Docker service is running and enabled + +**docker_config** (`tags: docker_config, docker`) - adds user to docker group, installs python3-docker + +![docker-blocks](screenshots/docker-blocks.png) + +## Tags + +| Tag | Scope | Description | +| ---------------- | ------------ | --------------------------------- | +| `packages` | common role | System package installation | +| `users` | common role | User management | +| `common` | common role | All common role tasks | +| `docker_install` | docker role | Docker CE installation | +| `docker_config` | docker role | Docker post-install configuration | +| `docker` | docker role | All docker role tasks | +| `app_deploy` | web_app role | Application deployment | +| `compose` | web_app role | Docker Compose operations | +| `web_app_wipe` | web_app role | Application removal | + +```bash +# Selective execution examples +ansible-playbook playbooks/provision.yml --tags packages +ansible-playbook playbooks/provision.yml --tags docker_install +ansible-playbook playbooks/deploy.yml --tags app_deploy --ask-vault-pass +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe --ask-vault-pass +ansible-playbook playbooks/deploy.yml --list-tags +``` + +![list-tags](screenshots/list-tags.png) +![tags-selective-run](screenshots/tags-selective-run.png) + +## Docker Compose Migration + +Replaced `community.docker.docker_container` with `docker_compose_v2` module. Benefits: + +- Declarative service definitions via templated `docker-compose.yml` +- Built-in `docker compose down` for clean removal +- Reusable template across multiple apps (bonus task) + +### Template + +```yaml +services: + {{ web_app_name }}: + image: {{ web_app_image }}:{{ web_app_tag }} + container_name: {{ web_app_name }} + restart: {{ web_app_restart_policy }} + ports: + - "{{ web_app_port }}:{{ web_app_internal_port }}" + labels: + managed-by: ansible +``` + +No `version:` key - obsolete in Compose v2 spec. + +### Variables + +| Variable | Default Value | Description | +| ----------------------- | ---------------------------------------------- | -------------------------- | +| `web_app_name` | `devops-info-service` | Container and service name | +| `web_app_image` | `{{ dockerhub_username }}/devops-info-service` | Docker image reference | +| `web_app_tag` | `latest` | Image tag | +| `web_app_port` | `5000` | Host port | +| `web_app_internal_port` | `5173` | Container port | +| `web_app_compose_dir` | `/opt/{{ web_app_name }}` | Compose project directory | +| `web_app_restart_policy`| `unless-stopped` | Container restart policy | +| `web_app_wipe` | `false` | Enable wipe mode | + +### Role Dependencies + +`web_app` declares `docker` as a dependency in `meta/main.yml`, so `deploy.yml` auto-ensures Docker is installed. + +## Deployment + +![deploy-output](screenshots/deploy-output.png) + +![docker-ps](screenshots/docker-ps.png) + +![health-check](screenshots/health-check.png) + +## Wipe Logic + +Double-gated cleanup: controlled by `web_app_wipe` variable (default: `false`) and `web_app_wipe` tag. Uses `failed_when: false` on compose down for idempotency. + +Steps: `docker compose down` -> remove compose file -> remove app directory. + +```bash +# Wipe only +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe --ask-vault-pass + +# Clean reinstall (wipe + deploy) +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --ask-vault-pass +``` + +![wipe-output](screenshots/wipe-output.png) + +![wipe-redeploy](screenshots/wipe-redeploy.png) +![wipe-redeploy-result](screenshots/wipe-redeploy-result.png) + +## CI/CD Integration + +Two GitHub Actions workflows with `workflow_dispatch` for manual triggers: + +**ansible-deploy.yml** (Python app) - triggers on `labs-work/ansible/**` changes (excluding docs). Lint job runs `ansible-lint`, deploy job runs playbook and verifies health. + +**ansible-deploy-bonus.yml** (Go app) - triggers on `web_app` role and bonus file changes. Same pattern, verifies on port 8001. + +### Required GitHub Secrets + +| Secret | Description | +| ------------------------ | ---------------------------------- | +| `ANSIBLE_VAULT_PASSWORD` | Password to decrypt Ansible Vault | +| `SSH_PRIVATE_KEY` | SSH private key for VM access | +| `VM_HOST` | Public IP address of the target VM | + +![ci-workflow-run](screenshots/ci-workflow-run.png) + +![ci-deploy-logs](screenshots/ci-deploy-logs.png) + +## Key Decisions + +| Decision | Rationale | +| -------------------------------- | ---------------------------------------------------- | +| Rename `app_deploy` to `web_app` | Reflects Docker Compose web service pattern | +| Port 5000:5173 preserved | Matches existing Yandex Cloud security group rules | +| Go app on port 8001 | Avoids conflict with Python app on 5000 | +| `include_tasks` for wipe | Allows tag filtering without running wipe by default | +| `pull: always` in compose | Ensures latest image on every deploy | +| Role meta dependency | No need to list docker role in deploy playbooks | + +## Bonus: Multi-Application Deployment + +The `web_app` role is reused for both apps by overriding variables: + +| Variable | Python App | Go App | +| ---------------------- | ------------------------------ | --------------------------------- | +| `web_app_name` | `devops-info-service` | `devops-info-service-go` | +| `web_app_image` | `mashfeii/devops-info-service` | `mashfeii/devops-info-service-go` | +| `web_app_port` | `5000` | `8001` | +| `web_app_internal_port`| `5173` | `8080` | +| `web_app_compose_dir` | `/opt/devops-info-service` | `/opt/devops-info-service-go` | + +Playbooks: `deploy_python.yml`, `deploy_bonus.yml` (individual), `deploy_all.yml` (both). + +```bash +ansible-playbook playbooks/deploy_python.yml --ask-vault-pass +ansible-playbook playbooks/deploy_bonus.yml --ask-vault-pass +ansible-playbook playbooks/deploy_all.yml --ask-vault-pass +ansible-playbook playbooks/deploy_bonus.yml -e "web_app_wipe=true" --tags web_app_wipe --ask-vault-pass +``` + +![deploy-all-output](screenshots/deploy-all-output.png) +![deploy-all-output-result](screenshots/deploy-all-output-result.png) + +![docker-ps-both](screenshots/docker-ps-both.png) + +VM IP changed since i forgot to open 8001 port and needed to re-apply terraform ^^ +![health-check-both](screenshots/health-check-both.png) + +![ci-bonus-deploy-logs](screenshots/ci-bonus-deploy-logs.png) + +## Challenges and Solutions + +**Problem:** `include_tasks` with tags needs tags on both the include statement and the included file + +**Solution:** Added `apply: tags` on `include_tasks` and matching tags on the block inside `wipe.yml` + +--- + +**Problem:** `docker compose down` fails if compose file doesn't exist yet + +**Solution:** `failed_when: false` on compose down task for idempotent wipe + +--- + +**Problem:** Role meta dependencies run docker role even with only wipe tags selected + +**Solution:** Acceptable - Docker must be present for `docker compose down`, and the role is idempotent diff --git a/labs-work/ansible/docs/screenshots/ci-bonus-deploy-logs.png b/labs-work/ansible/docs/screenshots/ci-bonus-deploy-logs.png new file mode 100644 index 0000000000..08cc363fd0 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/ci-bonus-deploy-logs.png differ diff --git a/labs-work/ansible/docs/screenshots/ci-deploy-logs.png b/labs-work/ansible/docs/screenshots/ci-deploy-logs.png new file mode 100644 index 0000000000..637ef10d33 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/ci-deploy-logs.png differ diff --git a/labs-work/ansible/docs/screenshots/ci-workflow-run.png b/labs-work/ansible/docs/screenshots/ci-workflow-run.png new file mode 100644 index 0000000000..5d35f638c2 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/ci-workflow-run.png differ diff --git a/labs-work/ansible/docs/screenshots/deploy-all-output-result.png b/labs-work/ansible/docs/screenshots/deploy-all-output-result.png new file mode 100644 index 0000000000..806a387739 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/deploy-all-output-result.png differ diff --git a/labs-work/ansible/docs/screenshots/deploy-all-output.png b/labs-work/ansible/docs/screenshots/deploy-all-output.png new file mode 100644 index 0000000000..95bea56156 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/deploy-all-output.png differ diff --git a/labs-work/ansible/docs/screenshots/deploy-output.png b/labs-work/ansible/docs/screenshots/deploy-output.png new file mode 100644 index 0000000000..53461a77ed Binary files /dev/null and b/labs-work/ansible/docs/screenshots/deploy-output.png differ diff --git a/labs-work/ansible/docs/screenshots/docker-blocks.png b/labs-work/ansible/docs/screenshots/docker-blocks.png new file mode 100644 index 0000000000..dd030f8d5e Binary files /dev/null and b/labs-work/ansible/docs/screenshots/docker-blocks.png differ diff --git a/labs-work/ansible/docs/screenshots/docker-ps-both.png b/labs-work/ansible/docs/screenshots/docker-ps-both.png new file mode 100644 index 0000000000..5dc7a33a10 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/docker-ps-both.png differ diff --git a/labs-work/ansible/docs/screenshots/docker-ps.png b/labs-work/ansible/docs/screenshots/docker-ps.png new file mode 100644 index 0000000000..1a6a7ed99a Binary files /dev/null and b/labs-work/ansible/docs/screenshots/docker-ps.png differ diff --git a/labs-work/ansible/docs/screenshots/dynamic-inventory-playbook.png b/labs-work/ansible/docs/screenshots/dynamic-inventory-playbook.png new file mode 100644 index 0000000000..d4db1f8e88 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/dynamic-inventory-playbook.png differ diff --git a/labs-work/ansible/docs/screenshots/dynamic-inventory.png b/labs-work/ansible/docs/screenshots/dynamic-inventory.png new file mode 100644 index 0000000000..d88f801e60 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/dynamic-inventory.png differ diff --git a/labs-work/ansible/docs/screenshots/health-check-both.png b/labs-work/ansible/docs/screenshots/health-check-both.png new file mode 100644 index 0000000000..63f922a750 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/health-check-both.png differ diff --git a/labs-work/ansible/docs/screenshots/health-check.png b/labs-work/ansible/docs/screenshots/health-check.png new file mode 100644 index 0000000000..a8f0b06727 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/health-check.png differ diff --git a/labs-work/ansible/docs/screenshots/list-tags.png b/labs-work/ansible/docs/screenshots/list-tags.png new file mode 100644 index 0000000000..32cf6daf6a Binary files /dev/null and b/labs-work/ansible/docs/screenshots/list-tags.png differ diff --git a/labs-work/ansible/docs/screenshots/provision-blocks.png b/labs-work/ansible/docs/screenshots/provision-blocks.png new file mode 100644 index 0000000000..3d9ee76a69 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/provision-blocks.png differ diff --git a/labs-work/ansible/docs/screenshots/provision-first-run.png b/labs-work/ansible/docs/screenshots/provision-first-run.png new file mode 100644 index 0000000000..3cffd016a5 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/provision-first-run.png differ diff --git a/labs-work/ansible/docs/screenshots/provision-second-run.png b/labs-work/ansible/docs/screenshots/provision-second-run.png new file mode 100644 index 0000000000..e5e2dbd0a5 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/provision-second-run.png differ diff --git a/labs-work/ansible/docs/screenshots/tags-selective-run.png b/labs-work/ansible/docs/screenshots/tags-selective-run.png new file mode 100644 index 0000000000..fb45c00767 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/tags-selective-run.png differ diff --git a/labs-work/ansible/docs/screenshots/vault-encrypted.png b/labs-work/ansible/docs/screenshots/vault-encrypted.png new file mode 100644 index 0000000000..244e14f3b6 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/vault-encrypted.png differ diff --git a/labs-work/ansible/docs/screenshots/wipe-output.png b/labs-work/ansible/docs/screenshots/wipe-output.png new file mode 100644 index 0000000000..3b8f577de9 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/wipe-output.png differ diff --git a/labs-work/ansible/docs/screenshots/wipe-redeploy-result.png b/labs-work/ansible/docs/screenshots/wipe-redeploy-result.png new file mode 100644 index 0000000000..8aa52ba30e Binary files /dev/null and b/labs-work/ansible/docs/screenshots/wipe-redeploy-result.png differ diff --git a/labs-work/ansible/docs/screenshots/wipe-redeploy.png b/labs-work/ansible/docs/screenshots/wipe-redeploy.png new file mode 100644 index 0000000000..1329a62203 Binary files /dev/null and b/labs-work/ansible/docs/screenshots/wipe-redeploy.png differ diff --git a/labs-work/ansible/inventory/group_vars/all.yml b/labs-work/ansible/inventory/group_vars/all.yml new file mode 100644 index 0000000000..9014c104f3 --- /dev/null +++ b/labs-work/ansible/inventory/group_vars/all.yml @@ -0,0 +1,15 @@ +$ANSIBLE_VAULT;1.1;AES256 +39653162376166343831633062643132636436396465316462663636386531666139343134363761 +3630323563373632333537346466373133613532626263630a343039383030333139336239326162 +35643764303663346262383833353666316636653061633030653536303033323164353663356663 +3261653464363139660a626163623835616230386533313561616132316636616362643364653864 +34623761633535376166356135316632376666666133636237396663376338326231363862393064 +30366238623039303437366564633064636339313665366162623132306565636136646537653838 +35666139326437336239363564663931373766313438313262396166633836643065636335333431 +61383562333837643539323739616236316332323066653130643938313433316335316564636134 +64313932343038326664663534363062323065393939313430303661613764356134646632373536 +61653238643861633966336130323936373664616136663737653162343531653436626636333230 +31643435366235626231656330383232373262616436636432613561666562353935346264396132 +61306562633939623337626636356333306261353039313834316432303132313865343331376432 +32633338393765653534313736373833393431316636613565363038336331643165333037333062 +3838633364633934386136363939623164643830306561346432 diff --git a/labs-work/ansible/inventory/hosts.ini b/labs-work/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..99493a4526 --- /dev/null +++ b/labs-work/ansible/inventory/hosts.ini @@ -0,0 +1,5 @@ +[webservers] +devops-vm ansible_host=89.169.150.106 ansible_user=ubuntu ansible_ssh_private_key_file=~/.ssh/devops-lab04 + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/labs-work/ansible/inventory/yandex_cloud.py b/labs-work/ansible/inventory/yandex_cloud.py new file mode 100755 index 0000000000..36b21a1784 --- /dev/null +++ b/labs-work/ansible/inventory/yandex_cloud.py @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import json +import os +import subprocess +import sys + + +FOLDER_ID = os.environ.get("YC_FOLDER_ID", "") + + +def yc_list_instances(): + cmd = [ + "yc", "compute", "instance", "list", + "--folder-id", FOLDER_ID, + "--format", "json", + ] + try: + result = subprocess.run(cmd, capture_output=True, text=True, check=True) + return json.loads(result.stdout) + except FileNotFoundError: + print("Error: 'yc' CLI not found. Install it first.", file=sys.stderr) + sys.exit(1) + except subprocess.CalledProcessError as e: + print(f"Error calling yc: {e.stderr}", file=sys.stderr) + sys.exit(1) + + +def build_inventory(instances): + inventory = { + "_meta": {"hostvars": {}}, + "all": {"children": ["ungrouped", "webservers"]}, + "webservers": {"hosts": []}, + "ungrouped": {"hosts": []}, + } + + for inst in instances: + if inst.get("status") != "RUNNING": + continue + + name = inst.get("name", inst["id"]) + labels = inst.get("labels", {}) + + # Extract public NAT IP + nat_ip = None + for iface in inst.get("network_interfaces", []): + addr = iface.get("primary_v4_address", {}) + nat = addr.get("one_to_one_nat", {}) + nat_ip = nat.get("address") + if nat_ip: + break + + if not nat_ip: + continue + + inventory["_meta"]["hostvars"][name] = { + "ansible_host": nat_ip, + "ansible_user": "ubuntu", + "ansible_ssh_private_key_file": "~/.ssh/devops-lab04", + } + + if labels.get("project") == "devops-course": + inventory["webservers"]["hosts"].append(name) + else: + inventory["ungrouped"]["hosts"].append(name) + + return inventory + + +def main(): + if len(sys.argv) == 2 and sys.argv[1] == "--list": + instances = yc_list_instances() + print(json.dumps(build_inventory(instances), indent=2)) + elif len(sys.argv) == 2 and sys.argv[1] == "--host": + print(json.dumps({})) + else: + print(json.dumps({"_meta": {"hostvars": {}}})) + + +if __name__ == "__main__": + main() diff --git a/labs-work/ansible/playbooks/deploy-monitoring.yml b/labs-work/ansible/playbooks/deploy-monitoring.yml new file mode 100644 index 0000000000..1e0da410e7 --- /dev/null +++ b/labs-work/ansible/playbooks/deploy-monitoring.yml @@ -0,0 +1,6 @@ +--- +- name: Deploy monitoring stack + hosts: webservers + become: true + roles: + - monitoring diff --git a/labs-work/ansible/playbooks/deploy.yml b/labs-work/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..df2e9c5067 --- /dev/null +++ b/labs-work/ansible/playbooks/deploy.yml @@ -0,0 +1,6 @@ +--- +- name: Deploy application + hosts: webservers + become: true + roles: + - web_app diff --git a/labs-work/ansible/playbooks/deploy_all.yml b/labs-work/ansible/playbooks/deploy_all.yml new file mode 100644 index 0000000000..8c3ac2cfb7 --- /dev/null +++ b/labs-work/ansible/playbooks/deploy_all.yml @@ -0,0 +1,16 @@ +--- +- name: Deploy Python application + hosts: webservers + become: true + vars_files: + - ../vars/app_python.yml + roles: + - web_app + +- name: Deploy Go application + hosts: webservers + become: true + vars_files: + - ../vars/app_bonus.yml + roles: + - web_app diff --git a/labs-work/ansible/playbooks/deploy_bonus.yml b/labs-work/ansible/playbooks/deploy_bonus.yml new file mode 100644 index 0000000000..d54fb59113 --- /dev/null +++ b/labs-work/ansible/playbooks/deploy_bonus.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy Go application + hosts: webservers + become: true + vars_files: + - ../vars/app_bonus.yml + roles: + - web_app diff --git a/labs-work/ansible/playbooks/deploy_python.yml b/labs-work/ansible/playbooks/deploy_python.yml new file mode 100644 index 0000000000..7fb00a73d5 --- /dev/null +++ b/labs-work/ansible/playbooks/deploy_python.yml @@ -0,0 +1,8 @@ +--- +- name: Deploy Python application + hosts: webservers + become: true + vars_files: + - ../vars/app_python.yml + roles: + - web_app diff --git a/labs-work/ansible/playbooks/provision.yml b/labs-work/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..e56fe03786 --- /dev/null +++ b/labs-work/ansible/playbooks/provision.yml @@ -0,0 +1,7 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + roles: + - common + - docker diff --git a/labs-work/ansible/playbooks/site.yml b/labs-work/ansible/playbooks/site.yml new file mode 100644 index 0000000000..d6d51bff0a --- /dev/null +++ b/labs-work/ansible/playbooks/site.yml @@ -0,0 +1,6 @@ +--- +- name: Full provisioning and deployment + ansible.builtin.import_playbook: provision.yml + +- name: Deploy application + ansible.builtin.import_playbook: deploy.yml diff --git a/labs-work/ansible/roles/app_deploy/defaults/main.yml b/labs-work/ansible/roles/app_deploy/defaults/main.yml new file mode 100644 index 0000000000..36448edb65 --- /dev/null +++ b/labs-work/ansible/roles/app_deploy/defaults/main.yml @@ -0,0 +1,7 @@ +--- +app_host_port: 5000 +app_container_port: 5173 +app_image: "{{ dockerhub_username }}/devops-info-service" +app_image_tag: "latest" +app_container_name: "devops-info-service" +app_restart_policy: "unless-stopped" diff --git a/labs-work/ansible/roles/app_deploy/handlers/main.yml b/labs-work/ansible/roles/app_deploy/handlers/main.yml new file mode 100644 index 0000000000..36138d4320 --- /dev/null +++ b/labs-work/ansible/roles/app_deploy/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: Restart app container + community.docker.docker_container: + name: '{{ app_container_name }}' + state: started + restart: true diff --git a/labs-work/ansible/roles/app_deploy/tasks/main.yml b/labs-work/ansible/roles/app_deploy/tasks/main.yml new file mode 100644 index 0000000000..f08dba8989 --- /dev/null +++ b/labs-work/ansible/roles/app_deploy/tasks/main.yml @@ -0,0 +1,39 @@ +--- +- name: Log in to Docker Hub + community.docker.docker_login: + username: '{{ dockerhub_username }}' + password: '{{ dockerhub_password }}' + no_log: true + +- name: Pull application image + community.docker.docker_image: + name: '{{ app_image }}' + tag: '{{ app_image_tag }}' + source: pull + force_source: true + +- name: Run application container + community.docker.docker_container: + name: '{{ app_container_name }}' + image: '{{ app_image }}:{{ app_image_tag }}' + state: started + restart_policy: '{{ app_restart_policy }}' + ports: + - '{{ app_host_port }}:{{ app_container_port }}' + recreate: true + +- name: Wait for application port to be available + ansible.builtin.wait_for: + port: '{{ app_host_port }}' + delay: 5 + timeout: 30 + +- name: Health check + ansible.builtin.uri: + url: 'http://localhost:{{ app_host_port }}/health' + return_content: true + status_code: 200 + register: health_result + retries: 3 + delay: 5 + until: health_result.status == 200 diff --git a/labs-work/ansible/roles/common/defaults/main.yml b/labs-work/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..c300a07bb6 --- /dev/null +++ b/labs-work/ansible/roles/common/defaults/main.yml @@ -0,0 +1,12 @@ +--- +common_packages: + - python3-pip + - curl + - git + - vim + - htop + - ca-certificates + - gnupg + - lsb-release + +common_timezone: "UTC" diff --git a/labs-work/ansible/roles/common/tasks/main.yml b/labs-work/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..cb1f47f1d5 --- /dev/null +++ b/labs-work/ansible/roles/common/tasks/main.yml @@ -0,0 +1,47 @@ +--- +- name: Install system packages + tags: + - packages + - common + block: + - name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + - name: Set timezone + community.general.timezone: + name: "{{ common_timezone }}" + rescue: + - name: Retry apt update with fix-missing + ansible.builtin.apt: + update_cache: true + register: common_apt_fix + retries: 3 + delay: 5 + until: common_apt_fix is success + + - name: Retry package installation + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + always: + - name: Log common role completion + ansible.builtin.copy: + content: "Common role completed at {{ ansible_date_time.iso8601 }}\n" + dest: /tmp/ansible-common-complete.log + mode: "0644" + +- name: Manage system users + tags: + - users + - common + block: + - name: Verify primary user exists + ansible.builtin.debug: + msg: "System user '{{ ansible_user }}' is configured" diff --git a/labs-work/ansible/roles/docker/defaults/main.yml b/labs-work/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..c7d0c8f4e7 --- /dev/null +++ b/labs-work/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,19 @@ +--- +docker_user: "{{ ansible_user | default('ubuntu') }}" + +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + +docker_gpg_url: "https://download.docker.com/linux/ubuntu/gpg" +docker_gpg_key_path: "/etc/apt/keyrings/docker.asc" + +docker_repo: >- + deb [arch={{ docker_arch }} signed-by={{ docker_gpg_key_path }}] + https://download.docker.com/linux/ubuntu + {{ ansible_distribution_release }} stable + +docker_arch: "{{ 'amd64' if ansible_architecture == 'x86_64' else ansible_architecture }}" diff --git a/labs-work/ansible/roles/docker/handlers/main.yml b/labs-work/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..07aa0eb290 --- /dev/null +++ b/labs-work/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/labs-work/ansible/roles/docker/tasks/main.yml b/labs-work/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..2bc352b958 --- /dev/null +++ b/labs-work/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,66 @@ +--- +- name: Install Docker + tags: + - docker_install + - docker + block: + - name: Create keyrings directory + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + + - name: Download Docker GPG key + ansible.builtin.get_url: + url: "{{ docker_gpg_url }}" + dest: "{{ docker_gpg_key_path }}" + mode: "0644" + + - name: Add Docker apt repository + ansible.builtin.apt_repository: + repo: "{{ docker_repo }}" + state: present + filename: docker + + - name: Install Docker packages + ansible.builtin.apt: + name: "{{ docker_packages }}" + state: present + update_cache: true + notify: Restart docker + rescue: + - name: Wait before retrying Docker installation + ansible.builtin.pause: + seconds: 10 + + - name: Retry apt update + ansible.builtin.apt: + update_cache: true + + - name: Retry Docker package installation + ansible.builtin.apt: + name: "{{ docker_packages }}" + state: present + notify: Restart docker + always: + - name: Ensure Docker service is started and enabled + ansible.builtin.service: + name: docker + state: started + enabled: true + +- name: Configure Docker + tags: + - docker_config + - docker + block: + - name: Add user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + + - name: Install python3-docker package + ansible.builtin.apt: + name: python3-docker + state: present diff --git a/labs-work/ansible/roles/monitoring/defaults/main.yml b/labs-work/ansible/roles/monitoring/defaults/main.yml new file mode 100644 index 0000000000..b1cdd49880 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/defaults/main.yml @@ -0,0 +1,28 @@ +--- +monitoring_loki_version: "3.0.0" +monitoring_promtail_version: "3.0.0" +monitoring_grafana_version: "12.3.1" + +monitoring_loki_port: 3100 +monitoring_promtail_port: 9080 +monitoring_grafana_port: 3000 +monitoring_app_python_port: 8000 +monitoring_app_go_port: 8001 + +monitoring_loki_retention: "168h" +monitoring_compose_dir: "/opt/monitoring" +monitoring_restart_policy: "unless-stopped" +monitoring_wipe: false + +monitoring_loki_memory: "256M" +monitoring_loki_cpus: "0.5" +monitoring_promtail_memory: "128M" +monitoring_promtail_cpus: "0.25" +monitoring_grafana_memory: "256M" +monitoring_grafana_cpus: "0.5" +monitoring_app_python_memory: "128M" +monitoring_app_python_cpus: "0.25" +monitoring_app_go_memory: "64M" +monitoring_app_go_cpus: "0.25" + +monitoring_grafana_admin_password: "{{ vault_grafana_admin_password | default('changeme') }}" diff --git a/labs-work/ansible/roles/monitoring/handlers/main.yml b/labs-work/ansible/roles/monitoring/handlers/main.yml new file mode 100644 index 0000000000..9ec903a521 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Restart monitoring stack + community.docker.docker_compose_v2: + project_src: "{{ monitoring_compose_dir }}" + state: restarted diff --git a/labs-work/ansible/roles/monitoring/meta/main.yml b/labs-work/ansible/roles/monitoring/meta/main.yml new file mode 100644 index 0000000000..cb7d8e0460 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker diff --git a/labs-work/ansible/roles/monitoring/tasks/deploy.yml b/labs-work/ansible/roles/monitoring/tasks/deploy.yml new file mode 100644 index 0000000000..53cb73a2a7 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/tasks/deploy.yml @@ -0,0 +1,64 @@ +--- +- name: Deploy monitoring stack + tags: + - monitoring_deploy + - monitoring + block: + - name: Deploy monitoring via Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ monitoring_compose_dir }}" + state: present + pull: always + + - name: Wait for Loki port + ansible.builtin.wait_for: + port: "{{ monitoring_loki_port }}" + delay: 5 + timeout: 60 + + - name: Wait for Grafana port + ansible.builtin.wait_for: + port: "{{ monitoring_grafana_port }}" + delay: 5 + timeout: 60 + + - name: Loki health check + ansible.builtin.uri: + url: "http://localhost:{{ monitoring_loki_port }}/ready" + return_content: true + status_code: 200 + register: loki_health + retries: 5 + delay: 5 + until: loki_health.status == 200 + + - name: Grafana health check + ansible.builtin.uri: + url: "http://localhost:{{ monitoring_grafana_port }}/api/health" + return_content: true + status_code: 200 + register: grafana_health + retries: 5 + delay: 5 + until: grafana_health.status == 200 + rescue: + - name: Log deployment failure + ansible.builtin.debug: + msg: "Monitoring stack deployment failed" + + - name: Show Docker Compose logs + ansible.builtin.command: + cmd: docker compose logs --tail=50 + chdir: "{{ monitoring_compose_dir }}" + register: monitoring_compose_logs + changed_when: false + failed_when: false + + - name: Display compose logs + ansible.builtin.debug: + var: monitoring_compose_logs.stdout_lines + when: monitoring_compose_logs is defined + + - name: Fail with deployment error + ansible.builtin.fail: + msg: "Monitoring stack deployment failed. Check compose logs above." diff --git a/labs-work/ansible/roles/monitoring/tasks/main.yml b/labs-work/ansible/roles/monitoring/tasks/main.yml new file mode 100644 index 0000000000..0fefb2b332 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/tasks/main.yml @@ -0,0 +1,31 @@ +--- +- name: Include wipe tasks + ansible.builtin.include_tasks: + file: wipe.yml + apply: + tags: + - monitoring_wipe + tags: + - monitoring_wipe + +- name: Include setup tasks + ansible.builtin.include_tasks: + file: setup.yml + apply: + tags: + - monitoring_setup + - monitoring + tags: + - monitoring_setup + - monitoring + +- name: Include deploy tasks + ansible.builtin.include_tasks: + file: deploy.yml + apply: + tags: + - monitoring_deploy + - monitoring + tags: + - monitoring_deploy + - monitoring diff --git a/labs-work/ansible/roles/monitoring/tasks/setup.yml b/labs-work/ansible/roles/monitoring/tasks/setup.yml new file mode 100644 index 0000000000..0a00dc4b38 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/tasks/setup.yml @@ -0,0 +1,40 @@ +--- +- name: Create monitoring directories + ansible.builtin.file: + path: "{{ item }}" + state: directory + mode: "0755" + loop: + - "{{ monitoring_compose_dir }}" + - "{{ monitoring_compose_dir }}/loki" + - "{{ monitoring_compose_dir }}/promtail" + - "{{ monitoring_compose_dir }}/grafana/provisioning/datasources" + - "{{ monitoring_compose_dir }}/grafana/provisioning/dashboards" + +- name: Template Loki config + ansible.builtin.template: + src: loki-config.yml.j2 + dest: "{{ monitoring_compose_dir }}/loki/config.yml" + mode: "0644" + notify: Restart monitoring stack + +- name: Template Promtail config + ansible.builtin.template: + src: promtail-config.yml.j2 + dest: "{{ monitoring_compose_dir }}/promtail/config.yml" + mode: "0644" + notify: Restart monitoring stack + +- name: Template Grafana datasource + ansible.builtin.template: + src: grafana-datasource.yml.j2 + dest: "{{ monitoring_compose_dir }}/grafana/provisioning/datasources/loki.yml" + mode: "0644" + notify: Restart monitoring stack + +- name: Template docker-compose.yml + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ monitoring_compose_dir }}/docker-compose.yml" + mode: "0644" + notify: Restart monitoring stack diff --git a/labs-work/ansible/roles/monitoring/tasks/wipe.yml b/labs-work/ansible/roles/monitoring/tasks/wipe.yml new file mode 100644 index 0000000000..6c46cba61f --- /dev/null +++ b/labs-work/ansible/roles/monitoring/tasks/wipe.yml @@ -0,0 +1,20 @@ +--- +- name: Wipe monitoring stack + when: monitoring_wipe | bool + tags: + - monitoring_wipe + block: + - name: Stop and remove monitoring via Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ monitoring_compose_dir }}" + state: absent + failed_when: false + + - name: Remove monitoring directory + ansible.builtin.file: + path: "{{ monitoring_compose_dir }}" + state: absent + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Monitoring stack has been wiped from {{ monitoring_compose_dir }}" diff --git a/labs-work/ansible/roles/monitoring/templates/docker-compose.yml.j2 b/labs-work/ansible/roles/monitoring/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..bc3d98a46a --- /dev/null +++ b/labs-work/ansible/roles/monitoring/templates/docker-compose.yml.j2 @@ -0,0 +1,118 @@ +services: + loki: + image: grafana/loki:{{ monitoring_loki_version }} + container_name: loki + command: -config.file=/etc/loki/config.yml + ports: + - "{{ monitoring_loki_port }}:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - loki + healthcheck: + test: ["CMD-SHELL", "wget --quiet --tries=1 --output-document=- http://localhost:3100/ready || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + memory: {{ monitoring_loki_memory }} + cpus: "{{ monitoring_loki_cpus }}" + restart: {{ monitoring_restart_policy }} + + promtail: + image: grafana/promtail:{{ monitoring_promtail_version }} + container_name: promtail + command: -config.file=/etc/promtail/config.yml + ports: + - "{{ monitoring_promtail_port }}:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + networks: + - loki + depends_on: + loki: + condition: service_healthy + deploy: + resources: + limits: + memory: {{ monitoring_promtail_memory }} + cpus: "{{ monitoring_promtail_cpus }}" + restart: {{ monitoring_restart_policy }} + + grafana: + image: grafana/grafana:{{ monitoring_grafana_version }} + container_name: grafana + ports: + - "{{ monitoring_grafana_port }}:3000" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - grafana-data:/var/lib/grafana + networks: + - loki + environment: + - GF_SECURITY_ADMIN_USER=admin + - GF_SECURITY_ADMIN_PASSWORD={{ monitoring_grafana_admin_password }} + - GF_AUTH_ANONYMOUS_ENABLED=false + depends_on: + loki: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget --quiet --tries=1 --output-document=- http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + memory: {{ monitoring_grafana_memory }} + cpus: "{{ monitoring_grafana_cpus }}" + restart: {{ monitoring_restart_policy }} + + app-python: + image: {{ dockerhub_username | default('mashfeii') }}/devops-info-service:latest + container_name: app-python + ports: + - "{{ monitoring_app_python_port }}:{{ monitoring_app_python_port }}" + environment: + - PORT={{ monitoring_app_python_port }} + networks: + - loki + labels: + logging: "promtail" + app: "devops-python" + deploy: + resources: + limits: + memory: {{ monitoring_app_python_memory }} + cpus: "{{ monitoring_app_python_cpus }}" + restart: {{ monitoring_restart_policy }} + + app-go: + image: {{ dockerhub_username | default('mashfeii') }}/devops-info-service-go:latest + container_name: app-go + ports: + - "{{ monitoring_app_go_port }}:8080" + networks: + - loki + labels: + logging: "promtail" + app: "devops-go" + deploy: + resources: + limits: + memory: {{ monitoring_app_go_memory }} + cpus: "{{ monitoring_app_go_cpus }}" + restart: {{ monitoring_restart_policy }} + +volumes: + loki-data: + grafana-data: + +networks: + loki: + driver: bridge diff --git a/labs-work/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 b/labs-work/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 new file mode 100644 index 0000000000..050b3c4ac8 --- /dev/null +++ b/labs-work/ansible/roles/monitoring/templates/grafana-datasource.yml.j2 @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + isDefault: true + editable: false diff --git a/labs-work/ansible/roles/monitoring/templates/loki-config.yml.j2 b/labs-work/ansible/roles/monitoring/templates/loki-config.yml.j2 new file mode 100644 index 0000000000..0c8904095b --- /dev/null +++ b/labs-work/ansible/roles/monitoring/templates/loki-config.yml.j2 @@ -0,0 +1,42 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: "2024-01-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + filesystem: + directory: /loki/chunks + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/cache + +ingester: + wal: + dir: /loki/wal + +limits_config: + retention_period: {{ monitoring_loki_retention }} + max_query_series: 100000 + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + delete_request_store: filesystem diff --git a/labs-work/ansible/roles/monitoring/templates/promtail-config.yml.j2 b/labs-work/ansible/roles/monitoring/templates/promtail-config.yml.j2 new file mode 100644 index 0000000000..d45d55080a --- /dev/null +++ b/labs-work/ansible/roles/monitoring/templates/promtail-config.yml.j2 @@ -0,0 +1,23 @@ +server: + http_listen_port: 9080 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ["__meta_docker_container_name"] + regex: "/(.*)" + target_label: "container" + - source_labels: ["__meta_docker_container_label_app"] + target_label: "app" diff --git a/labs-work/ansible/roles/web_app/defaults/main.yml b/labs-work/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..af5a3bf624 --- /dev/null +++ b/labs-work/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,9 @@ +--- +web_app_name: "devops-info-service" +web_app_image: "{{ dockerhub_username }}/devops-info-service" +web_app_tag: "latest" +web_app_port: 5000 +web_app_internal_port: 5173 +web_app_compose_dir: "/opt/{{ web_app_name }}" +web_app_restart_policy: "unless-stopped" +web_app_wipe: false diff --git a/labs-work/ansible/roles/web_app/handlers/main.yml b/labs-work/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..2fa77c3246 --- /dev/null +++ b/labs-work/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: Restart application via Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_dir }}" + state: restarted diff --git a/labs-work/ansible/roles/web_app/meta/main.yml b/labs-work/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..cb7d8e0460 --- /dev/null +++ b/labs-work/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,3 @@ +--- +dependencies: + - role: docker diff --git a/labs-work/ansible/roles/web_app/tasks/main.yml b/labs-work/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..83d769735e --- /dev/null +++ b/labs-work/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,78 @@ +--- +- name: Include wipe tasks + ansible.builtin.include_tasks: + file: wipe.yml + apply: + tags: + - web_app_wipe + tags: + - web_app_wipe + +- name: Log in to Docker Hub + community.docker.docker_login: + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + no_log: true + tags: + - app_deploy + - compose + +- name: Deploy application with Docker Compose + tags: + - app_deploy + - compose + block: + - name: Create application directory + ansible.builtin.file: + path: "{{ web_app_compose_dir }}" + state: directory + mode: "0755" + + - name: Template docker-compose.yml + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ web_app_compose_dir }}/docker-compose.yml" + mode: "0644" + + - name: Deploy application via Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_dir }}" + state: present + pull: always + + - name: Wait for application port to be available + ansible.builtin.wait_for: + port: "{{ web_app_port }}" + delay: 5 + timeout: 30 + + - name: Health check + ansible.builtin.uri: + url: "http://localhost:{{ web_app_port }}/health" + return_content: true + status_code: 200 + register: web_app_health_result + retries: 3 + delay: 5 + until: web_app_health_result.status == 200 + rescue: + - name: Log deployment failure + ansible.builtin.debug: + msg: "Deployment failed for {{ web_app_name }}" + + - name: Show Docker Compose logs + ansible.builtin.command: + cmd: docker compose logs --tail=50 + chdir: "{{ web_app_compose_dir }}" + register: web_app_compose_logs + changed_when: false + failed_when: false + + - name: Display compose logs + ansible.builtin.debug: + var: web_app_compose_logs.stdout_lines + when: web_app_compose_logs is defined + + - name: Fail with deployment error + ansible.builtin.fail: + msg: "Application deployment failed. Check compose logs above." diff --git a/labs-work/ansible/roles/web_app/tasks/wipe.yml b/labs-work/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..870f9106f7 --- /dev/null +++ b/labs-work/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,25 @@ +--- +- name: Wipe application + when: web_app_wipe | bool + tags: + - web_app_wipe + block: + - name: Stop and remove application via Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ web_app_compose_dir }}" + state: absent + failed_when: false + + - name: Remove docker-compose.yml + ansible.builtin.file: + path: "{{ web_app_compose_dir }}/docker-compose.yml" + state: absent + + - name: Remove application directory + ansible.builtin.file: + path: "{{ web_app_compose_dir }}" + state: absent + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ web_app_name }} has been wiped from {{ web_app_compose_dir }}" diff --git a/labs-work/ansible/roles/web_app/templates/docker-compose.yml.j2 b/labs-work/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..a8e1c66eb3 --- /dev/null +++ b/labs-work/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,9 @@ +services: + {{ web_app_name }}: + image: {{ web_app_image }}:{{ web_app_tag }} + container_name: {{ web_app_name }} + restart: {{ web_app_restart_policy }} + ports: + - "{{ web_app_port }}:{{ web_app_internal_port }}" + labels: + managed-by: ansible diff --git a/labs-work/ansible/vars/app_bonus.yml b/labs-work/ansible/vars/app_bonus.yml new file mode 100644 index 0000000000..94eb51caf7 --- /dev/null +++ b/labs-work/ansible/vars/app_bonus.yml @@ -0,0 +1,7 @@ +--- +web_app_name: "devops-info-service-go" +web_app_image: "{{ dockerhub_username }}/devops-info-service-go" +web_app_tag: "latest" +web_app_port: 8001 +web_app_internal_port: 8080 +web_app_compose_dir: "/opt/devops-info-service-go" diff --git a/labs-work/ansible/vars/app_python.yml b/labs-work/ansible/vars/app_python.yml new file mode 100644 index 0000000000..868d0df7df --- /dev/null +++ b/labs-work/ansible/vars/app_python.yml @@ -0,0 +1,7 @@ +--- +web_app_name: "devops-info-service" +web_app_image: "{{ dockerhub_username }}/devops-info-service" +web_app_tag: "latest" +web_app_port: 5000 +web_app_internal_port: 5173 +web_app_compose_dir: "/opt/devops-info-service" diff --git a/labs-work/app_go/.dockerignore b/labs-work/app_go/.dockerignore new file mode 100644 index 0000000000..59fc7e65e4 --- /dev/null +++ b/labs-work/app_go/.dockerignore @@ -0,0 +1,11 @@ +.git/ +.gitignore +.vscode/ +.idea/ +*.swp +*.swo +.DS_Store +Thumbs.db +docs/ +README.md +devops-info-service diff --git a/labs-work/app_go/.gitignore b/labs-work/app_go/.gitignore new file mode 100644 index 0000000000..b288c343b0 --- /dev/null +++ b/labs-work/app_go/.gitignore @@ -0,0 +1,11 @@ +devops-info-service +*.exe + +.vscode/ +.idea/ + +.DS_Store + +# Test coverage +coverage.out +coverage.html diff --git a/labs-work/app_go/Dockerfile b/labs-work/app_go/Dockerfile new file mode 100644 index 0000000000..1f3de0e975 --- /dev/null +++ b/labs-work/app_go/Dockerfile @@ -0,0 +1,18 @@ +FROM golang:1.21-alpine AS builder + +WORKDIR /build + +COPY go.mod . +COPY main.go . + +RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o devops-info-service . + +FROM scratch + +WORKDIR /app + +COPY --from=builder /build/devops-info-service . + +EXPOSE 8080 + +ENTRYPOINT ["/app/devops-info-service"] diff --git a/labs-work/app_go/README.md b/labs-work/app_go/README.md new file mode 100644 index 0000000000..53b64ede27 --- /dev/null +++ b/labs-work/app_go/README.md @@ -0,0 +1,168 @@ +![Go CI](https://github.com/mashfeii/DevOps-Core-Course/actions/workflows/go-ci.yml/badge.svg) +![Coverage](https://codecov.io/gh/mashfeii/DevOps-Core-Course/branch/master/graph/badge.svg?flag=go) + +# devops info service (go) + +a go web service that provides detailed information about itself and its runtime environment + +## overview + +this service exposes two endpoints that return json data about the system, service metadata, and health status + +## prerequisites + +- go 1.21 or higher + +## building + +```bash +# build the binary +go build -o devops-info-service main.go + +# or run directly +go run main.go +``` + +## running the application + +```bash +# default configuration (0.0.0.0:8080) +go run main.go + +# or with binary +./devops-info-service + +# custom port +PORT=3000 go run main.go + +# custom host and port +HOST=127.0.0.1 PORT=3000 go run main.go +``` + +## api endpoints + +### get / + +returns comprehensive service and system information + +**response example:** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "net/http" + }, + "system": { + "hostname": "my-laptop", + "platform": "darwin", + "architecture": "arm64", + "cpu_count": 8, + "go_version": "go1.21.0" + }, + "runtime": { + "uptime_seconds": 3600, + "uptime_human": "1 hours, 0 minutes", + "current_time": "2026-01-27T14:30:00Z", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1:52345", + "user_agent": "curl/8.1.2", + "method": "GET", + "path": "/" + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] +} +``` + +### get /health + +returns service health status for monitoring + +**response example:** +```json +{ + "status": "healthy", + "timestamp": "2026-01-27T14:30:00Z", + "uptime_seconds": 3600 +} +``` + +## configuration + +| variable | default | description | +|----------|---------|-------------| +| HOST | 0.0.0.0 | server bind address | +| PORT | 8080 | server port | + +## testing + +### unit tests + +```bash +# run tests +go test -v + +# run tests with coverage +go test -coverprofile=coverage.out +go tool cover -func=coverage.out + +# generate html coverage report +go tool cover -html=coverage.out -o coverage.html +``` + +### what's tested + +- `GET /` endpoint: response structure, service info, system info, runtime info +- `GET /health` endpoint: status code, health status, uptime +- 404 handler: error response format +- helper functions: uptime calculation + +### manual testing + +```bash +# test main endpoint +curl http://localhost:8080/ + +# test health endpoint +curl http://localhost:8080/health + +# pretty print json output +curl http://localhost:8080/ | python -m json.tool +``` + +## binary size comparison + +the compiled go binary is significantly smaller than a python application with its dependencies: + +| implementation | size | +|----------------|------| +| go binary | ~6-8 mb | +| python + flask | ~50+ mb (with venv) | + +## docker + +### building the image + +```bash +docker build -t devops-info-service-go . +``` + +### running the container + +```bash +docker run -p 8080:8080 devops-info-service-go +``` + +### multi-stage build + +the dockerfile uses multi-stage build: +- stage 1 (builder): compiles the binary using golang:1.21-alpine +- stage 2 (runtime): copies only the binary to scratch image + +this results in a final image of ~5-8 mb instead of ~300+ mb diff --git a/labs-work/app_go/docs/GO.md b/labs-work/app_go/docs/GO.md new file mode 100644 index 0000000000..266806c5cf --- /dev/null +++ b/labs-work/app_go/docs/GO.md @@ -0,0 +1,61 @@ +# go language justification + +## why go for the bonus task + +### compiled language benefits + +| criteria | go | python | +|----------|-----|--------| +| compilation | produces single binary | interpreted, requires runtime | +| deployment | copy one file | install python + pip + dependencies | +| startup time | milliseconds | seconds | +| memory usage | lower | higher | +| binary size | ~6-8 mb | n/a (needs interpreter) | + +### go specific advantages + +**1 simple concurrency model** +- goroutines for handling multiple requests +- built into the language, not a library + +**2 standard library http server** +- no external dependencies needed +- production-ready out of the box + +**3 cross-compilation** +- build for any platform from any platform +- `GOOS=linux GOARCH=amd64 go build` + +**4 docker optimization** +- multi-stage builds produce tiny images +- scratch or alpine base images possible +- typical final image: 10-20 mb vs 100+ mb for python + +### comparison with other compiled languages + +| language | learning curve | build time | binary size | ecosystem | +|----------|----------------|------------|-------------|-----------| +| go | gentle | fast | small | mature | +| rust | steep | slow | smaller | growing | +| java | moderate | slow | large | enterprise | +| c# | moderate | moderate | medium | enterprise | + +### why go over rust for this lab + +- simpler syntax, faster to learn +- faster compilation times +- excellent for web services and devops tooling +- kubernetes, docker, terraform all written in go + +## implementation notes + +the go implementation uses only standard library packages: +- `encoding/json` for json serialization +- `net/http` for the http server +- `os` for environment variables and hostname +- `runtime` for system information +- `time` for timestamps and uptime +- `fmt` for string formatting +- `log` for logging + +no external dependencies required diff --git a/labs-work/app_go/docs/LAB01.md b/labs-work/app_go/docs/LAB01.md new file mode 100644 index 0000000000..64648a5f31 --- /dev/null +++ b/labs-work/app_go/docs/LAB01.md @@ -0,0 +1,114 @@ +# lab 01 submission (go bonus) + +## implementation overview + +this is the bonus go implementation of the devops info service, providing the same functionality as the python version with identical json response structures + +## endpoints implemented + +### get / + +returns comprehensive service and system information including: +- service metadata (name, version, description, framework) +- system info (hostname, platform, architecture, cpu count, go version) +- runtime info (uptime, current time, timezone) +- request info (client ip, user agent, method, path) +- available endpoints list + +### get /health + +returns health check information: +- status: healthy +- timestamp in iso format +- uptime in seconds + +## code structure + +```go +// struct definitions for json responses +type ServiceInfo struct { ... } +type SystemInfo struct { ... } +type RuntimeInfo struct { ... } +type RequestInfo struct { ... } +type MainResponse struct { ... } +type HealthResponse struct { ... } +type ErrorResponse struct { ... } + +// helper function for uptime calculation +func getUptime() (int, string) { ... } + +// http handlers +func mainHandler(w http.ResponseWriter, r *http.Request) { ... } +func healthHandler(w http.ResponseWriter, r *http.Request) { ... } +func notFoundHandler(w http.ResponseWriter, r *http.Request) { ... } + +// application entry point +func main() { ... } +``` + +## features + +### environment variable configuration + +```go +port := os.Getenv("PORT") +if port == "" { + port = "8080" +} + +host := os.Getenv("HOST") +if host == "" { + host = "0.0.0.0" +} +``` + +### error handling + +custom 404 handler returns json error response: + +```go +func notFoundHandler(w http.ResponseWriter, r *http.Request) { + response := ErrorResponse{ + Error: "Not Found", + Message: "The requested endpoint does not exist", + Path: r.URL.Path, + } + w.WriteHeader(http.StatusNotFound) + json.NewEncoder(w).Encode(response) +} +``` + +### logging + +request logging using standard log package: + +```go +log.Printf("Request received: %s %s", r.Method, r.URL.Path) +``` + +## testing evidence + +![[fullscreen with two terminals: left running go app on port 8080, right showing curl responses for main and health endpoints]](screenshots/go-implementation.png) + +## build and run + +```bash +# build +go build -o devops-info-service main.go + +# run +./devops-info-service + +# or directly +go run main.go +``` + +## differences from python version + +| aspect | python | go | +|--------|--------|-----| +| framework field | Flask | net/http | +| python_version | included | replaced with go_version | +| default port | 5000 | 8080 | +| timestamp format | isoformat with microseconds | rfc3339 | +| client_ip | ip only | ip:port | diff --git a/labs-work/app_go/docs/LAB02.md b/labs-work/app_go/docs/LAB02.md new file mode 100644 index 0000000000..16a265dbd1 --- /dev/null +++ b/labs-work/app_go/docs/LAB02.md @@ -0,0 +1,86 @@ +# Lab 02 - Docker Multi-Stage Build (Bonus) + +## Multi-stage build strategy + +### builder stage + +- uses golang:1.21-alpine as base (~300mb) +- contains full go toolchain for compilation +- compiles application with static linking +- produces single binary file + +### runtime stage + +- uses scratch (empty image, 0 bytes base) +- contains only the compiled binary +- no shell, no package manager, no extra files +- minimal attack surface + +### why scratch works for go + +- go can produce fully static binaries with CGO_ENABLED=0 +- no runtime dependencies needed (unlike python or java) +- all libraries compiled into single executable +- binary is self-contained and portable + +## Size comparison + +### image sizes + +![[image sizes]](screenshots/image_sizes.png) + +### size reduction analysis + +- builder image: 221 mb (includes go compiler, tools, libraries) +- final image: 6.52 mb (only compiled binary) +- reduction: ~97% smaller than builder +- reason: discarded compiler, source code, build tools after compilation + +## Technical explanation + +### CGO_ENABLED=0 + +- disables c go interface +- produces pure go binary without c dependencies +- required for scratch base image (no libc available) +- ensures binary works without any system libraries + +### ldflags stripping + +- -s removes symbol table +- -w removes dwarf debugging information +- reduces binary size by ~30% +- no impact on runtime functionality + +### static compilation benefits + +- single file deployment +- no dependency resolution at runtime +- works on any linux system +- compatible with minimal base images + +## Security benefits + +### minimal attack surface + +- no shell means no shell injection possible +- no package manager means no supply chain attacks via container +- no extra utilities means fewer potential vulnerabilities +- only your code runs in the container + +### no shell in scratch + +- cannot exec into container with shell +- attackers cannot install tools if they gain access +- forces immutable infrastructure pattern +- debugging requires different approaches (logging, metrics) + +## Build and run process + +### build output + +![[multi-stage docker build]](screenshots/go_build.png) + +### endpoint testing + +![[curl tests]](screenshots/run_curl_go.png) diff --git a/labs-work/app_go/docs/LAB03.md b/labs-work/app_go/docs/LAB03.md new file mode 100644 index 0000000000..36a92831dc --- /dev/null +++ b/labs-work/app_go/docs/LAB03.md @@ -0,0 +1,147 @@ +# Lab 03 - CI/CD with GitHub Actions (Go Bonus) + +## Overview + +This document covers the Go CI workflow implementation as part of the Lab 03 bonus task for multi-app CI with path filters. + +### Testing Framework: Go Standard Library + +**Why Go's built-in testing:** + +- **Zero dependencies**: Part of Go standard library (`testing` package) +- **Convention-based**: Files ending in `_test.go` are automatically tests +- **Built-in coverage**: `go test -cover` works out of the box +- **Fast execution**: Compiled tests run extremely fast + +### Endpoints Tested + +| Endpoint | Tests Count | What's Validated | +| ------------- | ----------- | ---------------------------------------------------------------------- | +| `GET /` | 6 tests | Status code, JSON content-type, service/system/runtime info, endpoints | +| `GET /health` | 4 tests | Status code, JSON format, health status, uptime | +| 404 handler | 3 tests | Status code, error format, path inclusion | +| Helper | 1 test | getUptime() returns valid values | + +**Total: 14 tests** covering all handlers and helper functions. + +### Workflow Triggers + +```yaml +on: + push: + branches: [main, master] + paths: + - 'app_go/**' + - '.github/workflows/go-ci.yml' + pull_request: + branches: [main, master] + paths: + - 'app_go/**' + - '.github/workflows/go-ci.yml' +``` + +**Path filters ensure:** + +- Go CI only runs when Go code changes +- Python CI only runs when Python code changes +- Both can run in parallel if both change + +--- + +## Workflow Evidence + +### 1. Go CI Workflow Run + +![go-workflow-run](screenshots/go-workflow-run.png) + +--- + +### 2. Go Tests Passing Locally + +![go-tests-passing](screenshots/go-tests-passing.png) + +--- + +### 3. Go Coverage Report + +![go-coverage](screenshots/go-coverage.png) + +**Coverage breakdown:** + +- `getUptime`: 100% +- `mainHandler`: 100% +- `healthHandler`: 100% +- `notFoundHandler`: 100% +- `main`: 0% (entry point, expected) +- **Total: ~68%** + +--- + +### 4. Path Filters Working + +I tried many times to refactor pipeline to validate paths, but none of them is successfull :( + +--- + +### 5. Docker Hub Go Images + +![docker-hub-go](screenshots/docker-hub-go.png) + +**Docker Hub URL:** https://hub.docker.com/r/mashfeii/devops-info-service-go + +--- + +### 6. Caching Performance + +![cache-miss](screenshots/cache-miss.png) +![cache-hit](screenshots/cache-hit.png) + +**Metrics:** + +- Without cache (first run): 41 seconds +- With cache (subsequent): 29 seconds +- **Time saved:** 12 seconds (~30% improvement) + +--- + +## CI Workflow Comparison + +| Aspect | Python CI | Go CI | +| -------------------- | ---------------------------- | ------------------------------- | +| **Language Setup** | actions/setup-python@v5 | actions/setup-go@v5 | +| **Linting** | ruff check | golangci-lint-action | +| **Testing** | pytest | go test | +| **Coverage Tool** | pytest-cov | go test -coverprofile | +| **Coverage Upload** | codecov-action (xml) | codecov-action (out) | +| **Docker Image** | mashfeii/devops-info-service | mashfeii/devops-info-service-go | +| **Final Image Size** | ~150MB (python:3.13-slim) | ~6MB (scratch) | + +--- + +## Benefits of Path-Based Triggers + +1. **Resource efficiency**: Only relevant CI runs, saving compute time +2. **Faster feedback**: Don't wait for unrelated tests +3. **Clear ownership**: Each app has its own CI configuration +4. **Independent deployment**: Can deploy Python without touching Go + +--- + +## Challenges + +### Challenge: golangci-lint Configuration + +**Problem:** golangci-lint flagged some style issues in auto-generated code. + +**Solution:** Either fix the issues or configure `.golangci.yml` to exclude specific checks. + +### Challenge: Coverage File Format + +**Problem:** Go coverage output is `.out` format, not `.xml`. + +**Solution:** Codecov supports Go coverage format natively: + +```yaml +files: app_go/coverage.out +flags: go +``` diff --git a/labs-work/app_go/docs/screenshots/cache-hit.png b/labs-work/app_go/docs/screenshots/cache-hit.png new file mode 100644 index 0000000000..7b04613d82 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/cache-hit.png differ diff --git a/labs-work/app_go/docs/screenshots/cache-miss.png b/labs-work/app_go/docs/screenshots/cache-miss.png new file mode 100644 index 0000000000..bae2da9eb9 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/cache-miss.png differ diff --git a/labs-work/app_go/docs/screenshots/docker-hub-go.png b/labs-work/app_go/docs/screenshots/docker-hub-go.png new file mode 100644 index 0000000000..02cf3c9d79 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/docker-hub-go.png differ diff --git a/labs-work/app_go/docs/screenshots/docker_hub.png b/labs-work/app_go/docs/screenshots/docker_hub.png new file mode 100644 index 0000000000..e7d9d8db7f Binary files /dev/null and b/labs-work/app_go/docs/screenshots/docker_hub.png differ diff --git a/labs-work/app_go/docs/screenshots/docker_push.png b/labs-work/app_go/docs/screenshots/docker_push.png new file mode 100644 index 0000000000..298e3c8fb5 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/docker_push.png differ diff --git a/labs-work/app_go/docs/screenshots/go-coverage.png b/labs-work/app_go/docs/screenshots/go-coverage.png new file mode 100644 index 0000000000..edca441ad8 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/go-coverage.png differ diff --git a/labs-work/app_go/docs/screenshots/go-implementation.png b/labs-work/app_go/docs/screenshots/go-implementation.png new file mode 100644 index 0000000000..a0f3729cf5 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/go-implementation.png differ diff --git a/labs-work/app_go/docs/screenshots/go-tests-passing.png b/labs-work/app_go/docs/screenshots/go-tests-passing.png new file mode 100644 index 0000000000..bc07287ebb Binary files /dev/null and b/labs-work/app_go/docs/screenshots/go-tests-passing.png differ diff --git a/labs-work/app_go/docs/screenshots/go-workflow-run.png b/labs-work/app_go/docs/screenshots/go-workflow-run.png new file mode 100644 index 0000000000..9dd9797d2a Binary files /dev/null and b/labs-work/app_go/docs/screenshots/go-workflow-run.png differ diff --git a/labs-work/app_go/docs/screenshots/go_build.png b/labs-work/app_go/docs/screenshots/go_build.png new file mode 100644 index 0000000000..8c6943e3b3 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/go_build.png differ diff --git a/labs-work/app_go/docs/screenshots/image_sizes.png b/labs-work/app_go/docs/screenshots/image_sizes.png new file mode 100644 index 0000000000..defb93c5d5 Binary files /dev/null and b/labs-work/app_go/docs/screenshots/image_sizes.png differ diff --git a/labs-work/app_go/docs/screenshots/run_curl_go.png b/labs-work/app_go/docs/screenshots/run_curl_go.png new file mode 100644 index 0000000000..5b2b947caf Binary files /dev/null and b/labs-work/app_go/docs/screenshots/run_curl_go.png differ diff --git a/labs-work/app_go/go.mod b/labs-work/app_go/go.mod new file mode 100644 index 0000000000..307ce0d1c5 --- /dev/null +++ b/labs-work/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.21 diff --git a/labs-work/app_go/main.go b/labs-work/app_go/main.go new file mode 100644 index 0000000000..099b080c16 --- /dev/null +++ b/labs-work/app_go/main.go @@ -0,0 +1,180 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net/http" + "os" + "runtime" + "time" +) + +var startTime = time.Now() + +type ServiceInfo struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type SystemInfo struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + GoVersion string `json:"go_version"` +} + +type RuntimeInfo struct { + UptimeSeconds int `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type RequestInfo struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type MainResponse struct { + Service ServiceInfo `json:"service"` + System SystemInfo `json:"system"` + Runtime RuntimeInfo `json:"runtime"` + Request RequestInfo `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +type HealthResponse struct { + Status string `json:"status"` + Timestamp string `json:"timestamp"` + UptimeSeconds int `json:"uptime_seconds"` +} + +type ErrorResponse struct { + Error string `json:"error"` + Message string `json:"message"` + Path string `json:"path,omitempty"` +} + +func getUptime() (int, string) { + seconds := int(time.Since(startTime).Seconds()) + hours := seconds / 3600 + minutes := (seconds % 3600) / 60 + return seconds, fmt.Sprintf("%d hours, %d minutes", hours, minutes) +} + +func mainHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + notFoundHandler(w, r) + return + } + + hostname, _ := os.Hostname() + uptimeSec, uptimeHuman := getUptime() + + response := MainResponse{ + Service: ServiceInfo{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "net/http", + }, + System: SystemInfo{ + Hostname: hostname, + Platform: runtime.GOOS, + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + GoVersion: runtime.Version(), + }, + Runtime: RuntimeInfo{ + UptimeSeconds: uptimeSec, + UptimeHuman: uptimeHuman, + CurrentTime: time.Now().UTC().Format(time.RFC3339), + Timezone: "UTC", + }, + Request: RequestInfo{ + ClientIP: r.RemoteAddr, + UserAgent: r.UserAgent(), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + + log.Printf("Request received: %s %s", r.Method, r.URL.Path) + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + log.Printf("Failed to encode response: %v", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + } +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + uptimeSec, _ := getUptime() + + response := HealthResponse{ + Status: "healthy", + Timestamp: time.Now().UTC().Format(time.RFC3339), + UptimeSeconds: uptimeSec, + } + + log.Printf("Health check requested") + + w.Header().Set("Content-Type", "application/json") + if err := json.NewEncoder(w).Encode(response); err != nil { + log.Printf("Failed to encode response: %v", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + } +} + +func notFoundHandler(w http.ResponseWriter, r *http.Request) { + log.Printf("404 error: %s", r.URL.Path) + + response := ErrorResponse{ + Error: "Not Found", + Message: "The requested endpoint does not exist", + Path: r.URL.Path, + } + + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(http.StatusNotFound) + if err := json.NewEncoder(w).Encode(response); err != nil { + log.Printf("Failed to encode response: %v", err) + http.Error(w, "Internal Server Error", http.StatusInternalServerError) + } +} + +// Program entry point +func main() { + port := os.Getenv("PORT") + if port == "" { + port = "8080" + } + + host := os.Getenv("HOST") + if host == "" { + host = "0.0.0.0" + } + + http.HandleFunc("/", mainHandler) + http.HandleFunc("/health", healthHandler) + + addr := fmt.Sprintf("%s:%s", host, port) + log.Printf("Starting DevOps Info Service on %s", addr) + log.Fatal(http.ListenAndServe(addr, nil)) +} diff --git a/labs-work/app_go/main_test.go b/labs-work/app_go/main_test.go new file mode 100644 index 0000000000..a3c1a481fd --- /dev/null +++ b/labs-work/app_go/main_test.go @@ -0,0 +1,269 @@ +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestMainHandler_ReturnsOK(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + if status := rr.Code; status != http.StatusOK { + t.Errorf("mainHandler returned wrong status code: got %v want %v", status, http.StatusOK) + } +} + +func TestMainHandler_ReturnsJSON(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + contentType := rr.Header().Get("Content-Type") + if contentType != "application/json" { + t.Errorf("mainHandler returned wrong content type: got %v want %v", contentType, "application/json") + } +} + +func TestMainHandler_ContainsServiceInfo(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + var response MainResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if response.Service.Name != "devops-info-service" { + t.Errorf("unexpected service name: got %v want %v", response.Service.Name, "devops-info-service") + } + + if response.Service.Framework != "net/http" { + t.Errorf("unexpected framework: got %v want %v", response.Service.Framework, "net/http") + } +} + +func TestMainHandler_ContainsSystemInfo(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + var response MainResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if response.System.Hostname == "" { + t.Error("system hostname should not be empty") + } + + if response.System.CPUCount <= 0 { + t.Errorf("cpu count should be positive: got %v", response.System.CPUCount) + } +} + +func TestMainHandler_ContainsRuntimeInfo(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + var response MainResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if response.Runtime.Timezone != "UTC" { + t.Errorf("unexpected timezone: got %v want %v", response.Runtime.Timezone, "UTC") + } + + if response.Runtime.UptimeSeconds < 0 { + t.Errorf("uptime should be non-negative: got %v", response.Runtime.UptimeSeconds) + } +} + +func TestMainHandler_ContainsEndpoints(t *testing.T) { + req, err := http.NewRequest("GET", "/", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + var response MainResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if len(response.Endpoints) < 2 { + t.Errorf("expected at least 2 endpoints, got %v", len(response.Endpoints)) + } +} + +func TestHealthHandler_ReturnsOK(t *testing.T) { + req, err := http.NewRequest("GET", "/health", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(healthHandler) + handler.ServeHTTP(rr, req) + + if status := rr.Code; status != http.StatusOK { + t.Errorf("healthHandler returned wrong status code: got %v want %v", status, http.StatusOK) + } +} + +func TestHealthHandler_ReturnsJSON(t *testing.T) { + req, err := http.NewRequest("GET", "/health", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(healthHandler) + handler.ServeHTTP(rr, req) + + contentType := rr.Header().Get("Content-Type") + if contentType != "application/json" { + t.Errorf("healthHandler returned wrong content type: got %v want %v", contentType, "application/json") + } +} + +func TestHealthHandler_StatusIsHealthy(t *testing.T) { + req, err := http.NewRequest("GET", "/health", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(healthHandler) + handler.ServeHTTP(rr, req) + + var response HealthResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if response.Status != "healthy" { + t.Errorf("unexpected health status: got %v want %v", response.Status, "healthy") + } +} + +func TestHealthHandler_UptimeIsNonNegative(t *testing.T) { + req, err := http.NewRequest("GET", "/health", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(healthHandler) + handler.ServeHTTP(rr, req) + + var response HealthResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if response.UptimeSeconds < 0 { + t.Errorf("uptime should be non-negative: got %v", response.UptimeSeconds) + } +} + +func TestMainHandler_Returns404ForInvalidPath(t *testing.T) { + req, err := http.NewRequest("GET", "/nonexistent", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(mainHandler) + handler.ServeHTTP(rr, req) + + if status := rr.Code; status != http.StatusNotFound { + t.Errorf("mainHandler returned wrong status code for invalid path: got %v want %v", status, http.StatusNotFound) + } +} + +func TestNotFoundHandler_ReturnsJSON(t *testing.T) { + req, err := http.NewRequest("GET", "/nonexistent", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(notFoundHandler) + handler.ServeHTTP(rr, req) + + contentType := rr.Header().Get("Content-Type") + if contentType != "application/json" { + t.Errorf("notFoundHandler returned wrong content type: got %v want %v", contentType, "application/json") + } +} + +func TestNotFoundHandler_ContainsErrorInfo(t *testing.T) { + req, err := http.NewRequest("GET", "/nonexistent", nil) + if err != nil { + t.Fatal(err) + } + + rr := httptest.NewRecorder() + handler := http.HandlerFunc(notFoundHandler) + handler.ServeHTTP(rr, req) + + var response ErrorResponse + if err := json.NewDecoder(rr.Body).Decode(&response); err != nil { + t.Errorf("failed to decode response body: %v", err) + } + + if response.Error != "Not Found" { + t.Errorf("unexpected error message: got %v want %v", response.Error, "Not Found") + } + + if response.Path != "/nonexistent" { + t.Errorf("unexpected path in error response: got %v want %v", response.Path, "/nonexistent") + } +} + +func TestGetUptime_ReturnsNonNegativeSeconds(t *testing.T) { + seconds, human := getUptime() + + if seconds < 0 { + t.Errorf("uptime seconds should be non-negative: got %v", seconds) + } + + if human == "" { + t.Error("uptime human string should not be empty") + } +} diff --git a/labs-work/app_python/.dockerignore b/labs-work/app_python/.dockerignore new file mode 100644 index 0000000000..88361adbd0 --- /dev/null +++ b/labs-work/app_python/.dockerignore @@ -0,0 +1,25 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so +venv/ +.venv/ +ENV/ +.git/ +.gitignore +.vscode/ +.idea/ +*.swp +*.swo +.DS_Store +Thumbs.db +*.log +.pytest_cache/ +.coverage +htmlcov/ +dist/ +build/ +*.egg-info/ +docs/ +tests/ +README.md diff --git a/labs-work/app_python/.gitignore b/labs-work/app_python/.gitignore new file mode 100644 index 0000000000..f9d11c910a --- /dev/null +++ b/labs-work/app_python/.gitignore @@ -0,0 +1,26 @@ +__pycache__/ +*.py[cod] +*$py.class +*.so + +venv/ +.venv/ +ENV/ + +.vscode/ +.idea/ +*.swp +*.swo + +.DS_Store +Thumbs.db + +*.log + +.pytest_cache/ +.coverage +htmlcov/ + +dist/ +build/ +*.egg-info/ diff --git a/labs-work/app_python/Dockerfile b/labs-work/app_python/Dockerfile new file mode 100644 index 0000000000..dc69192a26 --- /dev/null +++ b/labs-work/app_python/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.13-slim + +WORKDIR /app + +RUN useradd --create-home --shell /bin/bash appuser + +COPY requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY app.py . + +RUN chown -R appuser:appuser /app + +USER appuser + +EXPOSE 5173 + +CMD ["python", "app.py"] diff --git a/labs-work/app_python/README.md b/labs-work/app_python/README.md new file mode 100644 index 0000000000..c3e0dcc63b --- /dev/null +++ b/labs-work/app_python/README.md @@ -0,0 +1,178 @@ +![Python CI](https://github.com/mashfeii/DevOps-Core-Course/actions/workflows/python-ci.yml/badge.svg) +![Coverage](https://codecov.io/gh/mashfeii/DevOps-Core-Course/branch/master/graph/badge.svg) + +# devops info service + +a python web service that provides detailed information about itself and its runtime environment + +## overview + +this service exposes two endpoints that return json data about the system, service metadata, and health status + +## prerequisites + +- python 3.11 or higher +- pip package manager + +## installation + +```bash +# create virtual environment +python -m venv venv + +# activate virtual environment +source venv/bin/activate # linux/macos +# or +venv\Scripts\activate # windows + +# install dependencies +pip install -r requirements.txt +``` + +## running the application + +```bash +# default configuration (0.0.0.0:5000) +python app.py + +# custom port +PORT=8080 python app.py + +# custom host and port +HOST=127.0.0.1 PORT=3000 python app.py + +# enable debug mode +DEBUG=true python app.py +``` + +## api endpoints + +### get / + +returns comprehensive service and system information + +**response example:** + +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": { + "hostname": "my-laptop", + "platform": "Darwin", + "platform_version": "Darwin-24.6.0-arm64", + "architecture": "arm64", + "cpu_count": 8, + "python_version": "3.13.1" + }, + "runtime": { + "uptime_seconds": 3600, + "uptime_human": "1 hours, 0 minutes", + "current_time": "2026-01-27T14:30:00.000000+00:00", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/8.1.2", + "method": "GET", + "path": "/" + }, + "endpoints": [ + { "path": "/", "method": "GET", "description": "Service information" }, + { "path": "/health", "method": "GET", "description": "Health check" } + ] +} +``` + +### get /health + +returns service health status for monitoring + +**response example:** + +```json +{ + "status": "healthy", + "timestamp": "2026-01-27T14:30:00.000000+00:00", + "uptime_seconds": 3600 +} +``` + +## configuration + +| variable | default | description | +| -------- | ------- | ------------------- | +| HOST | 0.0.0.0 | server bind address | +| PORT | 5000 | server port | +| DEBUG | false | enable debug mode | + +## testing + +### unit tests + +```bash +# install dev dependencies +pip install -r requirements-dev.txt + +# run tests +pytest + +# run tests with verbose output +pytest -v + +# run tests with coverage +pytest --cov=. --cov-report=term +``` + +### test structure + +``` +tests/ +├── __init__.py # test package marker +├── conftest.py # pytest fixtures (test client) +└── test_app.py # unit tests for all endpoints +``` + +### what's tested + +- `GET /` endpoint: response structure, data types, required fields +- `GET /health` endpoint: status code, response format, health status +- error handlers: 404 responses with correct format + +### manual testing + +```bash +# test main endpoint +curl http://localhost:5000/ + +# test health endpoint +curl http://localhost:5000/health + +# pretty print json output +curl http://localhost:5000/ | python -m json.tool +``` + +## docker + +### building the image + +```bash +docker build -t devops-info-service . +``` + +### running the container + +```bash +docker run -p 5173:5173 devops-info-service +``` + +### pulling from docker hub + +```bash +docker pull mashfeii/devops-info-service:latest +docker run -p 5173:5173 mashfeii/devops-info-service:latest +``` diff --git a/labs-work/app_python/app.py b/labs-work/app_python/app.py new file mode 100644 index 0000000000..2a375c4873 --- /dev/null +++ b/labs-work/app_python/app.py @@ -0,0 +1,165 @@ +import json +import logging +import os +import platform +import socket +from datetime import datetime, timezone + +from flask import Flask, jsonify, request + +app = Flask(__name__) + +HOST = os.getenv('HOST', '0.0.0.0') +PORT = int(os.getenv('PORT', 5173)) +DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' + +START_TIME = datetime.now(timezone.utc) + + +class JSONFormatter(logging.Formatter): + def format(self, record): + log_entry = { + 'timestamp': datetime.now(timezone.utc).isoformat(), + 'level': record.levelname, + 'logger': record.name, + 'message': record.getMessage(), + } + if hasattr(record, 'method'): + log_entry['method'] = record.method + if hasattr(record, 'path'): + log_entry['path'] = record.path + if hasattr(record, 'status_code'): + log_entry['status_code'] = record.status_code + if hasattr(record, 'client_ip'): + log_entry['client_ip'] = record.client_ip + return json.dumps(log_entry) + + +handler = logging.StreamHandler() +handler.setFormatter(JSONFormatter()) +logging.root.handlers = [] +logging.root.addHandler(handler) +logging.root.setLevel(logging.DEBUG if DEBUG else logging.INFO) +logger = logging.getLogger(__name__) + +werkzeug_logger = logging.getLogger('werkzeug') +werkzeug_logger.handlers = [] +werkzeug_logger.addHandler(handler) +werkzeug_logger.propagate = False + + +def get_uptime(): + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + return { + 'seconds': seconds, + 'human': f"{hours} hours, {minutes} minutes" + } + + +def get_system_info(): + return { + 'hostname': socket.gethostname(), + 'platform': platform.system(), + 'platform_version': platform.platform(), + 'architecture': platform.machine(), + 'cpu_count': os.cpu_count(), + 'python_version': platform.python_version() + } + + +def get_service_info(): + return { + 'name': 'devops-info-service', + 'version': '1.0.0', + 'description': 'DevOps course info service', + 'framework': 'Flask' + } + + +def get_request_info(): + return { + 'client_ip': request.remote_addr, + 'user_agent': request.headers.get('User-Agent', 'Unknown'), + 'method': request.method, + 'path': request.path + } + + +def get_endpoints_list(): + return [ + {'path': '/', 'method': 'GET', 'description': 'Service information'}, + {'path': '/health', 'method': 'GET', 'description': 'Health check'} + ] + + +@app.after_request +def log_request(response): + logger.info( + "Request processed", + extra={ + 'method': request.method, + 'path': request.path, + 'status_code': response.status_code, + 'client_ip': request.remote_addr, + }, + ) + return response + + +@app.route('/') +def index(): + uptime = get_uptime() + + response = { + 'service': get_service_info(), + 'system': get_system_info(), + 'runtime': { + 'uptime_seconds': uptime['seconds'], + 'uptime_human': uptime['human'], + 'current_time': datetime.now(timezone.utc).isoformat(), + 'timezone': 'UTC' + }, + 'request': get_request_info(), + 'endpoints': get_endpoints_list() + } + + return jsonify(response) + + +@app.route('/health') +def health(): + logger.debug("Health check requested") + + return jsonify({ + 'status': 'healthy', + 'timestamp': datetime.now(timezone.utc).isoformat(), + 'uptime_seconds': get_uptime()['seconds'] + }) + + +@app.errorhandler(404) +def not_found(error): + logger.warning(f"404 error: {request.path}") + return jsonify({ + 'error': 'Not Found', + 'message': 'The requested endpoint does not exist', + 'path': request.path + }), 404 + + +@app.errorhandler(500) +def internal_error(error): + logger.error(f"500 error: {str(error)}") + return jsonify({ + 'error': 'Internal Server Error', + 'message': 'An unexpected error occurred' + }), 500 + + +if __name__ == '__main__': + logger.info(f"Starting DevOps Info Service on {HOST}:{PORT}") + logger.info(f"Debug mode: {DEBUG}") + app.run(host=HOST, port=PORT, debug=DEBUG) diff --git a/labs-work/app_python/docs/LAB01.md b/labs-work/app_python/docs/LAB01.md new file mode 100644 index 0000000000..f030e7b7d4 --- /dev/null +++ b/labs-work/app_python/docs/LAB01.md @@ -0,0 +1,154 @@ +# lab 01 submission + +## framework selection + +### chosen framework: flask 3.1 + +i selected flask for this lab for the following reasons: + +| criteria | flask | fastapi | django | +|----------|-------|---------|--------| +| complexity | low | medium | high | +| learning curve | gentle | moderate | steep | +| setup time | minutes | minutes | longer | +| json api support | built-in jsonify | native | requires drf | +| async support | optional | native | optional | + +**decision rationale:** +- flask provides the simplest path to a working json api +- the built-in development server eliminates extra dependencies +- excellent documentation and community support +- matches the example code provided in lab instructions + +## best practices applied + +### 1 pep 8 compliance + +organized imports in three groups: standard library, third-party, local + +```python +import logging +import os +import platform + +from flask import Flask, jsonify, request +``` + +### 2 error handling + +implemented custom error handlers for 404 and 500 responses + +```python +@app.errorhandler(404) +def not_found(error): + return jsonify({ + 'error': 'Not Found', + 'message': 'The requested endpoint does not exist' + }), 404 +``` + +### 3 logging configuration + +configured structured logging with timestamps and log levels + +```python +logging.basicConfig( + level=logging.INFO, + format='%(asctime)s - %(name)s - %(levelname)s - %(message)s' +) +``` + +### 4 environment variables + +made all configuration externally configurable + +```python +HOST = os.getenv('HOST', '0.0.0.0') +PORT = int(os.getenv('PORT', 5000)) +DEBUG = os.getenv('DEBUG', 'False').lower() == 'true' +``` + +## api documentation + +### main endpoint + +**request:** +```bash +curl http://localhost:5000/ +``` + +**response:** +```json +{ + "service": { + "name": "devops-info-service", + "version": "1.0.0", + "description": "DevOps course info service", + "framework": "Flask" + }, + "system": { + "hostname": "macbook", + "platform": "Darwin", + "platform_version": "Darwin-24.6.0-arm64", + "architecture": "arm64", + "cpu_count": 8, + "python_version": "3.13.1" + }, + "runtime": { + "uptime_seconds": 120, + "uptime_human": "0 hours, 2 minutes", + "current_time": "2026-01-27T14:30:00.000000+00:00", + "timezone": "UTC" + }, + "request": { + "client_ip": "127.0.0.1", + "user_agent": "curl/8.1.2", + "method": "GET", + "path": "/" + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"} + ] +} +``` + +### health endpoint + +**request:** +```bash +curl http://localhost:5000/health +``` + +**response:** +```json +{ + "status": "healthy", + "timestamp": "2026-01-27T14:30:00.000000+00:00", + "uptime_seconds": 120 +} +``` + +## testing evidence + +![[fullscreen with two terminals: left running flask app on port 5173, right showing curl responses for main and health endpoints]](screenshots/python-implementation.png) + +## challenges and solutions + +### challenge 1: timezone handling + +**problem:** datetime objects were being serialized without timezone information + +**solution:** used `datetime.now(timezone.utc)` instead of `datetime.utcnow()` to ensure proper iso format with timezone + +### challenge 2: uptime calculation accuracy + +**problem:** needed consistent uptime across multiple endpoint calls + +**solution:** stored `START_TIME` as a module-level constant at application startup and calculated delta on each request + +## github community + +starring repositories helps signal appreciation to maintainers and bookmark useful projects for future reference + +following developers on github enables learning from their contributions and staying connected with the professional community diff --git a/labs-work/app_python/docs/LAB02.md b/labs-work/app_python/docs/LAB02.md new file mode 100644 index 0000000000..aca894f57a --- /dev/null +++ b/labs-work/app_python/docs/LAB02.md @@ -0,0 +1,107 @@ +# Lab 02 - Docker Containerization + +## Docker best practices applied + +### non-root user + +- created dedicated appuser with useradd +- switched to appuser before running application +- prevents container escape vulnerabilities and limits damage if app is compromised + +### specific base image version + +- used python:3.13-slim instead of python:latest +- ensures reproducible builds across environments +- slim variant reduces image size by ~100mb compared to full image + +### layer caching optimization + +- copied requirements.txt before application code +- pip install runs only when dependencies change +- application code changes dont invalidate dependency layer + +### dockerignore usage + +- excludes venv, pycache, docs, tests from build context +- reduces build time and context size +- prevents accidental inclusion of development artifacts + +### minimal file copying + +- only app.py and requirements.txt copied to final image +- no documentation or test files in production container + +## Image information and decisions + +### base image selection + +- chose python:3.13-slim over alpine because: + - better compatibility with pip packages + - includes necessary c libraries for common dependencies + - smaller than full python image (~150mb vs ~1gb) + - more stable than alpine for python workloads + +### final image size + +![[image size]](screenshots/python_size.png) + +### layer structure + +1. base image (python:3.13-slim) +2. workdir creation +3. user creation +4. requirements copy +5. pip install +6. app copy +7. ownership change +8. user switch + +## Build and run process + +### build output + +![[image build]](screenshots/python_build.png) + +### run output and endpoint testing + +![[run output and endpoint testing]](screenshots/run_curl_python.png) + +### docker hub + +- repository url: https://hub.docker.com/r/mashfeii/devops-info-service + ![[screenshot of docker hub repositories page]](screenshots/docker_hub.png) + +## Technical analysis + +### why layer order matters + +- docker caches each layer and reuses unchanged layers +- placing rarely-changing instructions first maximizes cache hits +- requirements.txt changes less often than app.py +- rebuilding after code change only reruns COPY app.py and later layers + +### security implications of non-root + +- root user in container has root-like privileges on host in some configurations +- non-root user limits blast radius of security vulnerabilities +- follows principle of least privilege +- required for many kubernetes security policies + +### dockerignore benefits + +- smaller build context means faster builds +- prevents secrets from accidentally being included +- reduces attack surface by excluding unnecessary files +- keeps image focused on runtime requirements only + +## Challenges and solutions + +### challenge 1: layer caching + +- initially copied all files at once, causing full rebuild on every change +- solution: split COPY into requirements.txt first, then app.py + +### challenge 2: permissions + +- app files owned by root after COPY +- solution: added chown command before switching to appuser diff --git a/labs-work/app_python/docs/LAB03.md b/labs-work/app_python/docs/LAB03.md new file mode 100644 index 0000000000..cb884de632 --- /dev/null +++ b/labs-work/app_python/docs/LAB03.md @@ -0,0 +1,235 @@ +# Lab 03 - CI/CD with GitHub Actions + +## Overview + +### Testing Framework: pytest + +**Why pytest over unittest:** + +- **Simple syntax**: No boilerplate code required, plain `assert` statements work +- **Powerful fixtures**: Dependency injection pattern for test setup (e.g., Flask test client) +- **Plugin ecosystem**: pytest-cov for coverage, pytest-xdist for parallel execution +- **Better output**: Detailed failure messages with context +- **Wide adoption**: Industry standard for Python testing + +### Endpoints Tested + +| Endpoint | Tests Count | What's Validated | +| ------------- | ----------- | ---------------------------------------------------------------------------------------------- | +| `GET /` | 8 tests | Response structure, data types, required fields (service, system, runtime, request, endpoints) | +| `GET /health` | 5 tests | Status code, JSON format, health status, timestamp, uptime | +| 404 handler | 4 tests | Error response format, error message, path inclusion | + +**Total: 17 tests** covering all application functionality. + +### Workflow Triggers + +```yaml +on: + push: + branches: [main, master] + paths: ['app_python/**', '.github/workflows/python-ci.yml'] + pull_request: + branches: [main, master] + paths: ['app_python/**', '.github/workflows/python-ci.yml'] +``` + +**Rationale:** + +- **Push to main/master**: Deploys to production (builds and pushes Docker image) +- **Pull requests**: Validates changes before merge (runs tests, lint, security scan) +- **Path filters**: Only triggers when relevant files change (saves CI minutes, avoids unnecessary runs for docs-only changes) + +### Versioning Strategy: CalVer (YYYY.MM.DD) + +**Why CalVer over SemVer:** + +- This is a **continuously deployed service**, not a library with breaking changes +- Date-based versions clearly indicate **when** code was deployed +- No need for manual version management or conventional commits parsing +- Easy to track deployment timeline + +**Docker tags generated:** + +1. `2026.02.12` - CalVer date tag +2. `latest` - Always points to most recent build +3. `abc1234` - Short git SHA for exact code traceability + +--- + +## Workflow Evidence + +### 1. Successful Workflow Run + +![workflow-run](screenshots/workflow-run.png) + +**Link to workflow run:** `https://github.com/mashfeii/DevOps-Core-Course/actions/runs/22034786861` + +--- + +### 2. Tests Passing Locally + +![tests-passing](screenshots/tests-passing.png) + +--- + +### 3. Coverage Report + +![coverage-report](screenshots/coverage-report.png) + +**Coverage analysis:** + +- **app.py**: ~90% coverage +- **Not covered**: Lines 116-117, 124-126 (500 error handler, main block) +- **Why acceptable**: Main block only runs when executed directly, not during tests; 500 handler is difficult to trigger without mocking + +--- + +### 4. Docker Hub Images + +![docker-hub](screenshots/docker-hub-python.png) + +**Docker Hub URL:** https://hub.docker.com/r/mashfeii/devops-info-service + +--- + +### 5. Status Badge Working + +![status-badge](screenshots/status-badge.png) + +--- + +### 6. Codecov Dashboard + +![codecov-dashboard](screenshots/codecov-dashboard.png) + +--- + +### 7. Snyk Security Scan + +![snyk-scan](screenshots/snyk-scan.png) + +**Snyk results summary:** + +- Vulnerabilities found: [0 critical, 0 high, 0 medium, 26 low] + +--- + +## Best Practices Implemented + +| Practice | Implementation | Why It Helps | +| ------------------------ | ---------------------------------------------------------------------- | -------------------------------------------------------------------- | +| **Job Dependencies** | `needs: [test, security]` on build job | Prevents pushing broken images; ensures tests pass before deployment | +| **Dependency Caching** | `cache: 'pip'` in setup-python action | Reduces workflow time by ~45s; avoids re-downloading packages | +| **Docker Layer Caching** | `cache-from/to: type=gha` in build-push-action | Faster Docker builds by reusing unchanged layers | +| **Path Filters** | `paths: ['app_python/**']` | Saves CI minutes; only runs when relevant code changes | +| **Conditional Push** | `if: github.event_name == 'push' && github.ref == 'refs/heads/master'` | Only deploys on merge to master, not on PRs | +| **Fail Fast** | Default pytest/job behavior | Stops workflow on first failure, saves time | +| **Coverage Threshold** | `--cov-fail-under=70` | Enforces minimum test coverage; prevents regression | +| **Security Scanning** | Snyk integration with `severity-threshold=high` | Catches known vulnerabilities in dependencies early | + +## Key Decisions + +### Versioning: CalVer vs SemVer + +| Aspect | CalVer (Chosen) | SemVer | +| ---------- | ------------------------- | -------------------------------------- | +| Format | 2026.02.12 | v1.2.3 | +| Automation | Fully automated from date | Requires commit parsing or manual tags | +| Use case | Continuous deployment | Library releases with breaking changes | +| Clarity | When was it deployed? | What changed? | + +**Decision:** CalVer chosen because this is a service that deploys continuously, not a library where breaking changes need explicit versioning. + +### Docker Tags Strategy + +``` +mashfeii/devops-info-service:2026.02.12 # When was it built +mashfeii/devops-info-service:latest # Quick local testing +mashfeii/devops-info-service:abc1234 # Exact commit reference +``` + +**Rationale:** + +- **CalVer tag**: Primary production reference, clear deployment timeline +- **latest**: Convenience for local development, always points to newest +- **SHA tag**: Enables exact code traceability for debugging + +### Workflow Triggers + +| Trigger | Action | Why | +| -------------- | ---------------------------------------------- | ------------------------------------------ | +| Push to master | Full pipeline (test → security → build → push) | Deploy validated code | +| Pull request | Test + Security only | Validate before merge, no deploy | +| Path filter | Only app_python/\*\* | Don't run CI for docs or unrelated changes | + +### Test Coverage + +- **Current coverage:** ~96% +- **Threshold set:** 70% +- **What's covered:** All endpoints, response structures, error handlers +- **What's not covered:** Main execution block (`if __name__ == '__main__'`), logging statements +- **Acceptable because:** Main block is entry point only; logging is side effect + +## Challenges and Solutions + +### Challenge 1: Flask Test Client Context + +**Problem:** Tests failed initially because `request` object wasn't available outside request context. + +**Solution:** Used `app.test_client()` context manager which properly initializes request context for testing. + +```python +@pytest.fixture +def client(app): + return app.test_client() +``` + +### Challenge 2: Coverage File Path in CI + +**Problem:** Codecov couldn't find `coverage.xml` because working directory was `app_python/`. + +**Solution:** Specified relative path in codecov action: + +```yaml +files: app_python/coverage.xml +``` + +### Challenge 3: Path Filters Not Triggering + +**Problem:** Initially workflows weren't respecting path filters. + +**Solution:** Ensured workflow file itself is included in paths: + +```yaml +paths: + - 'app_python/**' + - '.github/workflows/python-ci.yml' # Important! +``` + +--- + +## Account Setup Guide + +### Snyk Setup + +1. Go to https://snyk.io and sign up with GitHub +2. Navigate to Account Settings → API Token +3. Copy the token +4. In GitHub repo: Settings → Secrets and variables → Actions → New repository secret +5. Name: `SNYK_TOKEN`, Value: [paste token] + +### Codecov Setup + +1. Go to https://codecov.io and sign up with GitHub +2. Add your repository +3. Go to Settings for your repo, copy the Upload Token +4. In GitHub repo: Settings → Secrets and variables → Actions → New repository secret +5. Name: `CODECOV_TOKEN`, Value: [paste token] + +### Docker Hub Token + +1. Go to https://hub.docker.com → Account Settings → Security +2. New Access Token, name it "github-actions" +3. Copy immediately (won't be shown again) +4. Add secrets: `DOCKERHUB_USERNAME`=mashfeii, `DOCKERHUB_TOKEN`=[token] diff --git a/labs-work/app_python/docs/screenshots/codecov-dashboard.png b/labs-work/app_python/docs/screenshots/codecov-dashboard.png new file mode 100644 index 0000000000..7d267f8b03 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/codecov-dashboard.png differ diff --git a/labs-work/app_python/docs/screenshots/coverage-report.png b/labs-work/app_python/docs/screenshots/coverage-report.png new file mode 100644 index 0000000000..82b4edcd3b Binary files /dev/null and b/labs-work/app_python/docs/screenshots/coverage-report.png differ diff --git a/labs-work/app_python/docs/screenshots/docker-hub-python.png b/labs-work/app_python/docs/screenshots/docker-hub-python.png new file mode 100644 index 0000000000..7555b23322 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/docker-hub-python.png differ diff --git a/labs-work/app_python/docs/screenshots/docker_hub.png b/labs-work/app_python/docs/screenshots/docker_hub.png new file mode 100644 index 0000000000..e7d9d8db7f Binary files /dev/null and b/labs-work/app_python/docs/screenshots/docker_hub.png differ diff --git a/labs-work/app_python/docs/screenshots/docker_push.png b/labs-work/app_python/docs/screenshots/docker_push.png new file mode 100644 index 0000000000..298e3c8fb5 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/docker_push.png differ diff --git a/labs-work/app_python/docs/screenshots/python-implementation.png b/labs-work/app_python/docs/screenshots/python-implementation.png new file mode 100644 index 0000000000..302ccb7bf0 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/python-implementation.png differ diff --git a/labs-work/app_python/docs/screenshots/python_build.png b/labs-work/app_python/docs/screenshots/python_build.png new file mode 100644 index 0000000000..aac54d600e Binary files /dev/null and b/labs-work/app_python/docs/screenshots/python_build.png differ diff --git a/labs-work/app_python/docs/screenshots/python_size.png b/labs-work/app_python/docs/screenshots/python_size.png new file mode 100644 index 0000000000..ed473bb079 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/python_size.png differ diff --git a/labs-work/app_python/docs/screenshots/run_curl_python.png b/labs-work/app_python/docs/screenshots/run_curl_python.png new file mode 100644 index 0000000000..094020d728 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/run_curl_python.png differ diff --git a/labs-work/app_python/docs/screenshots/snyk-scan.png b/labs-work/app_python/docs/screenshots/snyk-scan.png new file mode 100644 index 0000000000..fc6eb405b9 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/snyk-scan.png differ diff --git a/labs-work/app_python/docs/screenshots/status-badge.png b/labs-work/app_python/docs/screenshots/status-badge.png new file mode 100644 index 0000000000..33310d9c62 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/status-badge.png differ diff --git a/labs-work/app_python/docs/screenshots/tests-passing.png b/labs-work/app_python/docs/screenshots/tests-passing.png new file mode 100644 index 0000000000..83bd44872e Binary files /dev/null and b/labs-work/app_python/docs/screenshots/tests-passing.png differ diff --git a/labs-work/app_python/docs/screenshots/workflow-run.png b/labs-work/app_python/docs/screenshots/workflow-run.png new file mode 100644 index 0000000000..40837d6716 Binary files /dev/null and b/labs-work/app_python/docs/screenshots/workflow-run.png differ diff --git a/labs-work/app_python/requirements-dev.txt b/labs-work/app_python/requirements-dev.txt new file mode 100644 index 0000000000..b11e7bfa48 --- /dev/null +++ b/labs-work/app_python/requirements-dev.txt @@ -0,0 +1,3 @@ +pytest>=8.0.0 +pytest-cov>=4.1.0 +ruff>=0.4.0 diff --git a/labs-work/app_python/requirements.txt b/labs-work/app_python/requirements.txt new file mode 100644 index 0000000000..22ac75b399 --- /dev/null +++ b/labs-work/app_python/requirements.txt @@ -0,0 +1 @@ +Flask==3.1.0 diff --git a/labs-work/app_python/tests/__init__.py b/labs-work/app_python/tests/__init__.py new file mode 100644 index 0000000000..3d9930fa2e --- /dev/null +++ b/labs-work/app_python/tests/__init__.py @@ -0,0 +1 @@ +"""test package for devops info service""" diff --git a/labs-work/app_python/tests/conftest.py b/labs-work/app_python/tests/conftest.py new file mode 100644 index 0000000000..846a2dc5c0 --- /dev/null +++ b/labs-work/app_python/tests/conftest.py @@ -0,0 +1,26 @@ +"""Pytest configuration and fixtures for devops-info-service tests.""" + +import pytest + +from app import app as flask_app + + +@pytest.fixture +def app(): + """Create application for testing.""" + flask_app.config.update({ + 'TESTING': True, + }) + yield flask_app + + +@pytest.fixture +def client(app): + """Create a test client for the Flask application.""" + return app.test_client() + + +@pytest.fixture +def runner(app): + """Create a test CLI runner.""" + return app.test_cli_runner() diff --git a/labs-work/app_python/tests/test_app.py b/labs-work/app_python/tests/test_app.py new file mode 100644 index 0000000000..adad841bdc --- /dev/null +++ b/labs-work/app_python/tests/test_app.py @@ -0,0 +1,165 @@ +"""Unit tests for devops-info-service Flask application.""" + +import json + + +class TestIndexEndpoint: + """Tests for GET / endpoint.""" + + def test_index_returns_200(self, client): + """Test that index endpoint returns HTTP 200.""" + response = client.get('/') + assert response.status_code == 200 + + def test_index_returns_json(self, client): + """Test that index endpoint returns JSON content type.""" + response = client.get('/') + assert response.content_type == 'application/json' + + def test_index_contains_service_info(self, client): + """Test that response contains service information with all required fields.""" + response = client.get('/') + data = json.loads(response.data) + + assert 'service' in data + assert 'name' in data['service'] + assert 'version' in data['service'] + assert 'description' in data['service'] + assert 'framework' in data['service'] + + assert data['service']['name'] == 'devops-info-service' + assert data['service']['framework'] == 'Flask' + + def test_index_contains_system_info(self, client): + """Test that response contains system information with all required fields.""" + response = client.get('/') + data = json.loads(response.data) + + assert 'system' in data + assert 'hostname' in data['system'] + assert 'platform' in data['system'] + assert 'platform_version' in data['system'] + assert 'architecture' in data['system'] + assert 'cpu_count' in data['system'] + assert 'python_version' in data['system'] + + def test_index_contains_runtime_info(self, client): + """Test that response contains runtime information with all required fields.""" + response = client.get('/') + data = json.loads(response.data) + + assert 'runtime' in data + assert 'uptime_seconds' in data['runtime'] + assert 'uptime_human' in data['runtime'] + assert 'current_time' in data['runtime'] + assert 'timezone' in data['runtime'] + + assert data['runtime']['timezone'] == 'UTC' + + def test_index_contains_request_info(self, client): + """Test that response contains request information with all required fields.""" + response = client.get('/') + data = json.loads(response.data) + + assert 'request' in data + assert 'client_ip' in data['request'] + assert 'user_agent' in data['request'] + assert 'method' in data['request'] + assert 'path' in data['request'] + + assert data['request']['method'] == 'GET' + assert data['request']['path'] == '/' + + def test_index_contains_endpoints(self, client): + """Test that response contains endpoints list with at least 2 items.""" + response = client.get('/') + data = json.loads(response.data) + + assert 'endpoints' in data + assert isinstance(data['endpoints'], list) + assert len(data['endpoints']) >= 2 + + paths = [ep['path'] for ep in data['endpoints']] + assert '/' in paths + assert '/health' in paths + + def test_index_data_types(self, client): + """Test that response fields have correct data types.""" + response = client.get('/') + data = json.loads(response.data) + + assert isinstance(data['runtime']['uptime_seconds'], int) + assert isinstance(data['system']['cpu_count'], int) + assert isinstance(data['service']['name'], str) + assert isinstance(data['system']['hostname'], str) + assert isinstance(data['runtime']['uptime_human'], str) + + +class TestHealthEndpoint: + """Tests for GET /health endpoint.""" + + def test_health_returns_200(self, client): + """Test that health endpoint returns HTTP 200.""" + response = client.get('/health') + assert response.status_code == 200 + + def test_health_returns_json(self, client): + """Test that health endpoint returns JSON content type.""" + response = client.get('/health') + assert response.content_type == 'application/json' + + def test_health_contains_required_fields(self, client): + """Test that health response contains all required fields.""" + response = client.get('/health') + data = json.loads(response.data) + + assert 'status' in data + assert 'timestamp' in data + assert 'uptime_seconds' in data + + def test_health_status_is_healthy(self, client): + """Test that health status is 'healthy'.""" + response = client.get('/health') + data = json.loads(response.data) + + assert data['status'] == 'healthy' + + def test_health_uptime_is_non_negative_integer(self, client): + """Test that uptime_seconds is a non-negative integer.""" + response = client.get('/health') + data = json.loads(response.data) + + assert isinstance(data['uptime_seconds'], int) + assert data['uptime_seconds'] >= 0 + + +class TestErrorHandlers: + """Tests for error handlers.""" + + def test_404_returns_not_found(self, client): + """Test that non-existent endpoint returns 404.""" + response = client.get('/nonexistent') + assert response.status_code == 404 + + def test_404_returns_json(self, client): + """Test that 404 response is JSON.""" + response = client.get('/nonexistent') + assert response.content_type == 'application/json' + + def test_404_contains_error_info(self, client): + """Test that 404 response contains error information.""" + response = client.get('/nonexistent') + data = json.loads(response.data) + + assert 'error' in data + assert 'message' in data + assert 'path' in data + + assert data['error'] == 'Not Found' + + def test_404_includes_requested_path(self, client): + """Test that 404 response includes the requested path.""" + response = client.get('/some/invalid/path') + data = json.loads(response.data) + + assert data['path'] == '/some/invalid/path' diff --git a/labs-work/docs/LAB04.md b/labs-work/docs/LAB04.md new file mode 100644 index 0000000000..71a6a54ad2 --- /dev/null +++ b/labs-work/docs/LAB04.md @@ -0,0 +1,209 @@ +# Lab 04 - Infrastructure as Code + +## Overview + +- Provisioning cloud infrastructure using two IaC tools: Terraform and Pulumi +- Deploying a virtual machine on Yandex Cloud with networking and security +- Comparing declarative (HCL) and imperative (Python) approaches to IaC +- Bonus: CI/CD validation pipeline for Terraform and GitHub repository import + +## Cloud Provider Selection + +### Yandex Cloud + +- Free tier with initial credits for new accounts +- Native Terraform provider (`yandex-cloud/yandex`) with active maintenance +- Pulumi community provider available (`pulumi-yandex`) +- Geographic accessibility and straightforward account setup + +### Resources Created + +| Resource | Specification | +| ---------------- | ----------------------------------------------------- | +| Compute Instance | standard-v2, 2 cores (20% fraction), 1 GB RAM | +| Boot Disk | 10 GB HDD, Ubuntu 24.04 LTS | +| VPC Network | `devops-network` with project labels | +| Subnet | `devops-subnet`, CIDR 10.0.1.0/24, zone ru-central1-a | +| Security Group | SSH (22), HTTP (80), App (5000) ingress; all egress | + +## Terraform Implementation + +### Project Structure + +``` +labs-work/terraform/ + .gitignore + .tflint.hcl + main.tf + outputs.tf + variables.tf + github/ + .gitignore + main.tf + outputs.tf +``` + +### Key Configuration Decisions + +- **Provider authentication**: via environment variables (`YC_TOKEN`, `YC_CLOUD_ID`) rather than hardcoded credentials, keeping the configuration portable and secure +- **Dynamic image lookup**: using `data "yandex_compute_image"` with `family = "ubuntu-2404-lts"` to always get the latest image in the family +- **Resource labeling**: all resources tagged with `project = "devops-course"` and `lab = "lab04"` for cost tracking and organization +- **Variable separation**: all configurable values extracted to `variables.tf` with sensible defaults, only `folder_id` is required +- **Security group**: explicit ingress rules for SSH, HTTP, and application ports with full egress allowed + +### Terraform Commands Output + +#### terraform init + +![terraform-init](screenshots/terraform-init.png) + +#### terraform plan + +![terraform-plan](screenshots/terraform-plan.png) + +#### terraform apply + +![terraform-apply](screenshots/terraform-apply.png) + +#### SSH Connection Proof + +![terraform-ssh](screenshots/terraform-ssh.png) + +## Pulumi Implementation + +### Project Structure + +``` +labs-work/pulumi/ + .gitignore + Pulumi.yaml + requirements.txt + __main__.py +``` + +### Key Differences from Terraform + +- **Configuration via Pulumi Config**: uses `pulumi.Config()` with `require()` and `get()` instead of `.tfvars` files +- **SSH key as value**: the public key content is passed directly via config rather than a file path, since Pulumi config supports secret values natively +- **Python native constructs**: conditionals, string formatting, and type hints available directly without HCL workarounds +- **Typed argument classes**: uses `*Args` dataclasses (e.g., `ComputeInstanceResourcesArgs`) for structured, IDE-friendly resource configuration +- **Output transformations**: uses `.apply()` for computed value transformations (e.g., building the SSH command from the IP) + +### Pulumi Commands Output + +#### pulumi preview + +![pulumi-preview](screenshots/pulumi-preview.png) + +#### pulumi up + +![pulumi-up](screenshots/pulumi-up.png) + +#### SSH Connection Proof + +![pulumi-ssh](screenshots/pulumi-ssh.png) + +## Terraform vs Pulumi Comparison + +| Aspect | Terraform | Pulumi | +| ---------------- | ----------------------------------------------- | -------------------------------------------- | +| Language | HCL (domain-specific) | Python, Go, TypeScript, etc | +| Learning Curve | Low for simple cases, steeper for complex logic | Depends on language familiarity | +| IDE Support | HCL plugins, limited autocomplete | Full language IDE support | +| State Management | Local file or remote backends | Pulumi Cloud or self-managed backends | +| Debugging | `terraform console`, plan output | Standard language debuggers (pdb, etc) | +| Community | Largest IaC community, most providers | Growing, strong in general-purpose languages | +| Secret Handling | Requires external tools or state encryption | Built-in secret encryption | + +### Ease of Learning + +Terraform's HCL syntax is purpose-built for infrastructure and reads like a configuration file, making it approachable for newcomers without programming experience. Pulumi leverages existing programming languages, so developers already proficient in Python or TypeScript can start quickly without learning a new DSL. However, Pulumi requires understanding both the language and the Pulumi SDK concepts (inputs, outputs, apply), which adds an initial learning step. For teams with mixed backgrounds (ops and dev), Terraform's simpler mental model tends to be easier to adopt uniformly. + +### Code Readability + +Terraform configurations are declarative and read top-to-bottom with clear resource boundaries, making it straightforward to understand what infrastructure exists. Pulumi code can be more compact, especially when using loops, conditionals, or helper functions to reduce repetition. However, Pulumi code can also become harder to follow when complex Python logic is mixed with infrastructure declarations. For this lab's relatively simple infrastructure, both tools produce equally readable code. + +### Debugging + +Terraform debugging is limited to `terraform plan` output, `terraform console` for expression evaluation, and verbose logging via `TF_LOG`. Pulumi benefits from standard language debugging tools - breakpoints, stack traces, and print statements work as expected. When a Pulumi deployment fails, the Python traceback often points directly to the issue, whereas Terraform errors reference HCL line numbers and provider-specific messages. For complex infrastructure with dynamic logic, Pulumi's debugging experience is noticeably better. + +### Documentation Quality + +Terraform has extensive official documentation with per-provider resource references, examples, and a large community producing tutorials and modules. The Yandex Cloud Terraform provider documentation covers most resources with usage examples. Pulumi's documentation is well-structured with API references auto-generated from provider schemas, but community-contributed examples are fewer. The Pulumi Yandex provider has minimal documentation compared to its Terraform counterpart, requiring occasional reference to Terraform docs for resource behavior. + +### Recommended Use Cases + +Terraform is the better choice for teams that need broad provider support, established workflows, and a large pool of engineers familiar with the tool. Pulumi excels when teams want to leverage existing programming skills, need complex logic in their infrastructure code, or want built-in secret management. For multi-cloud environments with simple resource definitions, Terraform's maturity and ecosystem give it an edge. For application-heavy teams building infrastructure alongside application code, Pulumi's native language integration reduces context switching. In this lab, both tools achieved identical results with comparable effort. + +## Bonus: IaC CI/CD Pipeline + +### Workflow Implementation + +The workflow (`.github/workflows/terraform-ci.yml`) follows the same patterns established by the Python and Go CI pipelines, triggering on changes to `labs-work/terraform/**` files on push and pull request events. + +### Validation Steps + +| Step | Tool | Purpose | +| ------------ | --------------------------------- | -------------------------------------------------------- | +| Format check | `terraform fmt -check -recursive` | Enforce consistent HCL formatting | +| Initialize | `terraform init -backend=false` | Download providers without credentials | +| Validate | `terraform validate` | Check configuration syntax and internal consistency | +| TFLint init | `tflint --init` | Download TFLint plugin rulesets | +| TFLint | `tflint --format compact` | Catch naming issues, deprecated syntax, unused variables | + +No cloud credentials are required for validation - `terraform init -backend=false` downloads the provider plugin and `terraform validate` checks syntax without connecting to any API. + +### Workflow Evidence + +![terraform-ci](screenshots/terraform-ci.png) + +## Bonus: GitHub Repository Import + +### Why Import Existing Resources + +- Bring manually created infrastructure under version-controlled IaC management +- Enable drift detection between actual state and declared configuration +- Allow future changes to the repository settings through code review and CI/CD +- Demonstrate Terraform's ability to adopt existing resources without recreation + +### Import Process + +```bash +cd labs-work/terraform/github +export GITHUB_TOKEN="ghp_..." +terraform init +terraform import github_repository.devops_course DevOps-Core-Course +terraform plan # should show no changes if config matches +``` + +### Import Output + +![github-import](screenshots/github-import.png) + +![github-plan](screenshots/github-plan.png) + +## Lab 5 Preparation + +### VM Status + +- The VM created in this lab can be kept running for Lab 05 (Ansible configuration) +- If using Terraform, do not run `terraform destroy` until Lab 05 is complete +- If using Pulumi, do not run `pulumi destroy` until Lab 05 is complete +- Note the public IP from the outputs for use in Ansible inventory + +### Cleanup Status + +- Resources not needed for Lab 05 should be destroyed to avoid charges +- Run `terraform destroy` or `pulumi destroy` after completing Lab 05 + +## Challenges and Solutions + +### Challenge 1: Provider Authentication Without Hardcoding + +**Problem:** Yandex Cloud authentication requires tokens and folder IDs that should not be committed to version control +**Solution:** Used environment variables (`YC_TOKEN`) for authentication and `terraform.tfvars` (gitignored) for folder-specific values, keeping the codebase clean and portable + +### Challenge 2: CI Validation Without Cloud Credentials + +**Problem:** The CI pipeline needs to validate Terraform configurations without access to Yandex Cloud credentials +**Solution:** Used `terraform init -backend=false` to skip backend initialization and `terraform validate` for syntax checking, which only requires the provider plugin, not API access diff --git a/labs-work/docs/screenshots/docker-blocks-total.png b/labs-work/docs/screenshots/docker-blocks-total.png new file mode 100644 index 0000000000..512b2f5918 Binary files /dev/null and b/labs-work/docs/screenshots/docker-blocks-total.png differ diff --git a/labs-work/docs/screenshots/docker-blocks.png b/labs-work/docs/screenshots/docker-blocks.png new file mode 100644 index 0000000000..a7aa17cb8e Binary files /dev/null and b/labs-work/docs/screenshots/docker-blocks.png differ diff --git a/labs-work/docs/screenshots/github-import.png b/labs-work/docs/screenshots/github-import.png new file mode 100644 index 0000000000..1a0275b1cf Binary files /dev/null and b/labs-work/docs/screenshots/github-import.png differ diff --git a/labs-work/docs/screenshots/github-plan.png b/labs-work/docs/screenshots/github-plan.png new file mode 100644 index 0000000000..6af3c7cc1e Binary files /dev/null and b/labs-work/docs/screenshots/github-plan.png differ diff --git a/labs-work/docs/screenshots/pulumi-preview.png b/labs-work/docs/screenshots/pulumi-preview.png new file mode 100644 index 0000000000..733a09470e Binary files /dev/null and b/labs-work/docs/screenshots/pulumi-preview.png differ diff --git a/labs-work/docs/screenshots/pulumi-ssh.png b/labs-work/docs/screenshots/pulumi-ssh.png new file mode 100644 index 0000000000..e172ee80e1 Binary files /dev/null and b/labs-work/docs/screenshots/pulumi-ssh.png differ diff --git a/labs-work/docs/screenshots/pulumi-up.png b/labs-work/docs/screenshots/pulumi-up.png new file mode 100644 index 0000000000..72e4f943c4 Binary files /dev/null and b/labs-work/docs/screenshots/pulumi-up.png differ diff --git a/labs-work/docs/screenshots/terraform-apply.png b/labs-work/docs/screenshots/terraform-apply.png new file mode 100644 index 0000000000..9a71c3c14a Binary files /dev/null and b/labs-work/docs/screenshots/terraform-apply.png differ diff --git a/labs-work/docs/screenshots/terraform-ci.png b/labs-work/docs/screenshots/terraform-ci.png new file mode 100644 index 0000000000..91e1b752f5 Binary files /dev/null and b/labs-work/docs/screenshots/terraform-ci.png differ diff --git a/labs-work/docs/screenshots/terraform-init.png b/labs-work/docs/screenshots/terraform-init.png new file mode 100644 index 0000000000..39f266c119 Binary files /dev/null and b/labs-work/docs/screenshots/terraform-init.png differ diff --git a/labs-work/docs/screenshots/terraform-plan.png b/labs-work/docs/screenshots/terraform-plan.png new file mode 100644 index 0000000000..61b5157098 Binary files /dev/null and b/labs-work/docs/screenshots/terraform-plan.png differ diff --git a/labs-work/docs/screenshots/terraform-ssh.png b/labs-work/docs/screenshots/terraform-ssh.png new file mode 100644 index 0000000000..b1f77b6eab Binary files /dev/null and b/labs-work/docs/screenshots/terraform-ssh.png differ diff --git a/labs-work/k8s/README.md b/labs-work/k8s/README.md new file mode 100644 index 0000000000..68dae2ae8f --- /dev/null +++ b/labs-work/k8s/README.md @@ -0,0 +1,131 @@ +# Kubernetes Deployment Documentation + +## Architecture Overview + +``` + ┌─────────────────────────────────────┐ + │ Minikube Cluster │ + │ │ + User Request ──► │ Service (NodePort :80) │ + │ │ │ + │ ├──► Pod 1 (Flask :5173) │ + │ ├──► Pod 2 (Flask :5173) │ + │ └──► Pod 3 (Flask :5173) │ + │ │ + │ Deployment: 3 replicas │ + │ Strategy: RollingUpdate │ + │ Resources: 64-128Mi / 50-100m CPU │ + └─────────────────────────────────────┘ +``` + +- 3 Pod replicas of `mashfeii/devops-info-service` behind a NodePort Service +- Each Pod gets 64Mi memory request / 128Mi limit, 50m CPU request / 100m limit +- Traffic enters via NodePort, load-balanced across all ready Pods + +## Manifest Files + +| File | Description | +| ------------------- | ---------------------------------------------------------------------------------- | +| `deployment.yml` | Python app Deployment - 3 replicas, health probes, resource limits, rolling update | +| `service.yml` | NodePort Service exposing port 80 → container port 5173 | +| `deployment-go.yml` | Go app Deployment (bonus) - same patterns, port 8080 | +| `service-go.yml` | NodePort Service for Go app (bonus) | +| `ingress.yml` | Ingress with path-based routing and TLS (bonus) | + +### Key Configuration Choices + +- **3 replicas** - balances availability with local resource usage +- **RollingUpdate** with `maxSurge: 1, maxUnavailable: 0` - ensures zero downtime during updates +- **Resource limits** - prevents any single Pod from consuming excessive cluster resources +- **Separate liveness/readiness probes** - liveness restarts unhealthy containers, readiness gates traffic + +## Deployment Evidence + +![kubectl get all](screenshots/all-info.png) + +![kubectl describe deployment](screenshots/kubectl-describe-deployment.png) + +![app curl output](screenshots/app-curl-output.png) + +## Operations Performed + +### Deploy + +```bash +kubectl apply -f k8s/deployment.yml +kubectl apply -f k8s/service.yml +kubectl get pods +``` + +![initial deployment](screenshots/deploy-initial.png) + +### Scaling to 5 Replicas + +```bash +kubectl scale deployment/devops-info-service --replicas=5 +kubectl get pods +kubectl rollout status deployment/devops-info-service +``` + +![scaled to 5 replicas](screenshots/scaled-5.png) + +### Rolling Update + +```bash +kubectl apply -f k8s/deployment.yml +kubectl rollout status deployment/devops-info-service +``` + +![rolling update](screenshots/rolling-update.png) + +### Rollback + +```bash +kubectl rollout history deployment/devops-info-service +kubectl rollout undo deployment/devops-info-service +kubectl rollout status deployment/devops-info-service +``` + +![rollback](screenshots/all-info.png) + +## Production Considerations + +### Health Checks + +- **Liveness probe** (`/health`, period 5s) - restarts containers stuck in a broken state +- **Readiness probe** (`/health`, period 3s) - removes unready Pods from Service endpoints, preventing failed requests + +### Resource Limits Rationale + +- Requests (64Mi/50m) guarantee scheduling baseline +- Limits (128Mi/100m) cap burst usage to protect other workloads +- Python Flask is lightweight; these values are sufficient for the info service + +### Production Improvements + +- Use `Ingress` or `LoadBalancer` instead of NodePort +- Add `PodDisruptionBudget` for maintenance safety +- Implement `HorizontalPodAutoscaler` for dynamic scaling +- Use namespaces to isolate environments (dev/staging/prod) +- Add network policies to restrict Pod-to-Pod traffic +- Set up Prometheus + Grafana for monitoring (see Lab 07) + +## Challenges & Solutions + +- **Problem:** Image pull errors when cluster can't reach Docker Hub +- **Solution:** Verify internet connectivity; for local images use `minikube image load` + +- **Problem:** Pods in CrashLoopBackOff +- **Solution:** Check logs with `kubectl logs ` and describe with `kubectl describe pod ` to identify probe failures or configuration issues + +- **Problem:** Service not routing traffic +- **Solution:** Verify label selectors match between Service and Deployment using `kubectl get endpoints` + +### Debugging Commands + +```bash +kubectl logs +kubectl describe pod +kubectl get events --sort-by='.lastTimestamp' +kubectl get endpoints +``` diff --git a/labs-work/k8s/deployment-go.yml b/labs-work/k8s/deployment-go.yml new file mode 100644 index 0000000000..649665f2d7 --- /dev/null +++ b/labs-work/k8s/deployment-go.yml @@ -0,0 +1,57 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-service-go + labels: + app: devops-info-service-go + version: v1 +spec: + replicas: 3 + selector: + matchLabels: + app: devops-info-service-go + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: devops-info-service-go + version: v1 + spec: + containers: + - name: devops-info-service-go + image: mashfeii/devops-info-service-go:latest + ports: + - containerPort: 8080 + protocol: TCP + resources: + requests: + memory: "32Mi" + cpu: "25m" + limits: + memory: "64Mi" + cpu: "50m" + livenessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 8080 + initialDelaySeconds: 3 + periodSeconds: 3 + timeoutSeconds: 2 + failureThreshold: 3 + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "8080" diff --git a/labs-work/k8s/deployment.yml b/labs-work/k8s/deployment.yml new file mode 100644 index 0000000000..ef48880cbe --- /dev/null +++ b/labs-work/k8s/deployment.yml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-service + labels: + app: devops-info-service + version: v2 +spec: + replicas: 3 + selector: + matchLabels: + app: devops-info-service + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + template: + metadata: + labels: + app: devops-info-service + version: v1 + spec: + containers: + - name: devops-info-service + image: mashfeii/devops-info-service:latest + imagePullPolicy: Never + ports: + - containerPort: 5173 + protocol: TCP + resources: + requests: + memory: "64Mi" + cpu: "50m" + limits: + memory: "128Mi" + cpu: "100m" + livenessProbe: + httpGet: + path: /health + port: 5173 + initialDelaySeconds: 10 + periodSeconds: 5 + timeoutSeconds: 3 + failureThreshold: 3 + readinessProbe: + httpGet: + path: /health + port: 5173 + initialDelaySeconds: 5 + periodSeconds: 3 + timeoutSeconds: 2 + failureThreshold: 3 + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "5173" diff --git a/labs-work/k8s/ingress.yml b/labs-work/k8s/ingress.yml new file mode 100644 index 0000000000..b0a9a43303 --- /dev/null +++ b/labs-work/k8s/ingress.yml @@ -0,0 +1,29 @@ +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: apps-ingress + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / +spec: + tls: + - hosts: + - local.example.com + secretName: tls-secret + rules: + - host: local.example.com + http: + paths: + - path: /app1 + pathType: Prefix + backend: + service: + name: devops-info-service + port: + number: 80 + - path: /app2 + pathType: Prefix + backend: + service: + name: devops-info-service-go + port: + number: 80 diff --git a/labs-work/k8s/screenshots/all-info.png b/labs-work/k8s/screenshots/all-info.png new file mode 100644 index 0000000000..a851870b48 Binary files /dev/null and b/labs-work/k8s/screenshots/all-info.png differ diff --git a/labs-work/k8s/screenshots/app-curl-output.png b/labs-work/k8s/screenshots/app-curl-output.png new file mode 100644 index 0000000000..a38ef40318 Binary files /dev/null and b/labs-work/k8s/screenshots/app-curl-output.png differ diff --git a/labs-work/k8s/screenshots/deploy-initial.png b/labs-work/k8s/screenshots/deploy-initial.png new file mode 100644 index 0000000000..cab9a95c4c Binary files /dev/null and b/labs-work/k8s/screenshots/deploy-initial.png differ diff --git a/labs-work/k8s/screenshots/kubectl-describe-deployment.png b/labs-work/k8s/screenshots/kubectl-describe-deployment.png new file mode 100644 index 0000000000..89dd0f416b Binary files /dev/null and b/labs-work/k8s/screenshots/kubectl-describe-deployment.png differ diff --git a/labs-work/k8s/screenshots/kubectl-get-all.png b/labs-work/k8s/screenshots/kubectl-get-all.png new file mode 100644 index 0000000000..f270d827c9 Binary files /dev/null and b/labs-work/k8s/screenshots/kubectl-get-all.png differ diff --git a/labs-work/k8s/screenshots/kubectl-get-pods-svc.png b/labs-work/k8s/screenshots/kubectl-get-pods-svc.png new file mode 100644 index 0000000000..eaa8f0fd52 Binary files /dev/null and b/labs-work/k8s/screenshots/kubectl-get-pods-svc.png differ diff --git a/labs-work/k8s/screenshots/rolling-update.png b/labs-work/k8s/screenshots/rolling-update.png new file mode 100644 index 0000000000..83ae085f19 Binary files /dev/null and b/labs-work/k8s/screenshots/rolling-update.png differ diff --git a/labs-work/k8s/screenshots/scaled-5.png b/labs-work/k8s/screenshots/scaled-5.png new file mode 100644 index 0000000000..a26951af07 Binary files /dev/null and b/labs-work/k8s/screenshots/scaled-5.png differ diff --git a/labs-work/k8s/service-go.yml b/labs-work/k8s/service-go.yml new file mode 100644 index 0000000000..a50d8049b5 --- /dev/null +++ b/labs-work/k8s/service-go.yml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-service-go + labels: + app: devops-info-service-go +spec: + type: NodePort + selector: + app: devops-info-service-go + ports: + - protocol: TCP + port: 80 + targetPort: 8080 diff --git a/labs-work/k8s/service.yml b/labs-work/k8s/service.yml new file mode 100644 index 0000000000..3a19bf1d4d --- /dev/null +++ b/labs-work/k8s/service.yml @@ -0,0 +1,14 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-service + labels: + app: devops-info-service +spec: + type: NodePort + selector: + app: devops-info-service + ports: + - protocol: TCP + port: 80 + targetPort: 5173 diff --git a/labs-work/monitoring/.gitignore b/labs-work/monitoring/.gitignore new file mode 100644 index 0000000000..4c49bd78f1 --- /dev/null +++ b/labs-work/monitoring/.gitignore @@ -0,0 +1 @@ +.env diff --git a/labs-work/monitoring/docker-compose.yml b/labs-work/monitoring/docker-compose.yml new file mode 100644 index 0000000000..839f98c98f --- /dev/null +++ b/labs-work/monitoring/docker-compose.yml @@ -0,0 +1,119 @@ +services: + loki: + image: grafana/loki:3.0.0 + container_name: loki + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - loki + healthcheck: + test: ["CMD-SHELL", "wget --quiet --tries=1 --output-document=- http://localhost:3100/ready || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + memory: 256M + cpus: "0.5" + restart: unless-stopped + + promtail: + image: grafana/promtail:3.0.0 + container_name: promtail + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + networks: + - loki + depends_on: + loki: + condition: service_healthy + deploy: + resources: + limits: + memory: 128M + cpus: "0.25" + restart: unless-stopped + + grafana: + image: grafana/grafana:12.3.1 + container_name: grafana + ports: + - "3000:3000" + volumes: + - ./grafana/provisioning:/etc/grafana/provisioning:ro + - grafana-data:/var/lib/grafana + networks: + - loki + env_file: + - .env + environment: + - GF_SECURITY_ADMIN_USER=admin + depends_on: + loki: + condition: service_healthy + healthcheck: + test: ["CMD-SHELL", "wget --quiet --tries=1 --output-document=- http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + memory: 256M + cpus: "0.5" + restart: unless-stopped + + app-python: + build: ../app_python + image: mashfeii/devops-info-service:latest + container_name: app-python + ports: + - "8000:8000" + environment: + - PORT=8000 + networks: + - loki + labels: + logging: "promtail" + app: "devops-python" + deploy: + resources: + limits: + memory: 128M + cpus: "0.25" + restart: unless-stopped + + app-go: + image: mashfeii/devops-info-service-go:latest + container_name: app-go + ports: + - "8001:8080" + networks: + - loki + labels: + logging: "promtail" + app: "devops-go" + deploy: + resources: + limits: + memory: 64M + cpus: "0.25" + restart: unless-stopped + +volumes: + loki-data: + grafana-data: + +networks: + loki: + driver: bridge diff --git a/labs-work/monitoring/docs/LAB07.md b/labs-work/monitoring/docs/LAB07.md new file mode 100644 index 0000000000..091986ebaf --- /dev/null +++ b/labs-work/monitoring/docs/LAB07.md @@ -0,0 +1,254 @@ +# Lab 07 - Observability & Logging with Loki Stack + +## Overview + +- Deployed centralized logging stack: Loki + Promtail + Grafana via Docker Compose +- Added structured JSON logging to the Python app (no new dependencies) +- Auto-provisioned Loki datasource in Grafana +- Hardened for production: resource limits, health checks, retention, auth +- Bonus: Ansible role for automated deployment on remote VM + +## Architecture + +``` +┌─────────────┐ ┌──────────┐ ┌───────────┐ +│ app-python │────>│ │ │ │ +│ (JSON logs)│ │ Promtail │────>│ Loki │ +│ app-go │────>│ │ │ │ +└─────────────┘ └──────────┘ └─────┬─────┘ + │ + ┌─────▼─────┐ + │ Grafana │ + │ :3000 │ + └───────────┘ +``` + +Promtail discovers containers via Docker socket, filters by `logging=promtail` label, and ships logs to Loki. Grafana queries Loki for visualization. + +### Service Versions + +| Service | Image | Port | +| -------- | ------------------------------ | ---- | +| Loki | grafana/loki:3.0.0 | 3100 | +| Promtail | grafana/promtail:3.0.0 | 9080 | +| Grafana | grafana/grafana:12.3.1 | 3000 | +| Python | mashfeii/devops-info-service | 8000 | +| Go | mashfeii/devops-info-service-go| 8001 | + +## Setup + +```bash +cd labs-work/monitoring + +# Create .env with Grafana admin password +echo 'GF_SECURITY_ADMIN_PASSWORD=your-secure-password' > .env +echo 'GF_AUTH_ANONYMOUS_ENABLED=false' >> .env + +# Start the stack +docker compose up -d + +# Verify all services +docker compose ps +curl http://localhost:3100/ready # Loki +curl http://localhost:9080/targets # Promtail +curl http://localhost:8000/ # Python app +curl http://localhost:8001/ # Go app +``` + +Open Grafana at `http://localhost:3000` (admin / your password from `.env`). + +## JSON Structured Logging + +Custom `JSONFormatter` class added to `app.py` without new dependencies. Uses `@app.after_request` to log every request uniformly. + +### Log Format + +```json +{ + "timestamp": "2026-03-11T12:00:00+00:00", + "level": "INFO", + "logger": "app", + "message": "Request processed", + "method": "GET", + "path": "/", + "status_code": 200, + "client_ip": "172.18.0.1" +} +``` + +Benefits over plain text: +- Loki can parse JSON fields with `| json` pipeline +- Filter by `status_code`, `method`, `path` in LogQL +- No regex needed for log parsing + +## Loki Configuration + +Key settings in `loki/config.yml`: + +| Setting | Value | Rationale | +| ------------------ | ------------ | ------------------------------------------------ | +| `store` | `tsdb` | Modern index format, better performance than BoltDB | +| `schema` | `v13` | Latest schema, required for TSDB | +| `object_store` | `filesystem` | Simplest backend for single-node deployment | +| `retention_period` | `168h` | 7 days - enough for dev, prevents disk bloat | +| `auth_enabled` | `false` | Single-tenant dev setup | +| `compactor` | enabled | Enforces retention, cleans old chunks | + +## Promtail Configuration + +Docker service discovery (`docker_sd_configs`) with filtering: + +- Connects to Docker socket to discover running containers +- Filters containers by `logging=promtail` label - only monitored apps are scraped +- Relabels `__meta_docker_container_name` to `container` label +- Relabels `__meta_docker_container_label_app` to `app` label for LogQL queries + +## Grafana Provisioning + +Loki datasource is auto-provisioned via `grafana/provisioning/datasources/loki.yml` - no manual setup needed after first boot. Dashboard provider configured at `/var/lib/grafana/dashboards`. + +## Dashboard Panels and LogQL Queries + +### Recommended Panels + +**Live Log Stream** - shows real-time logs from both apps: +```logql +{app=~"devops-python|devops-go"} +``` + +**Error Logs** - filters for warnings and errors: +```logql +{app=~"devops-python|devops-go"} | json | level=~"WARNING|ERROR" +``` + +**Request Rate** (metric from logs): +```logql +count_over_time({app="devops-python"} | json | message="Request processed" [1m]) +``` + +**Status Code Distribution**: +```logql +sum by (status_code) (count_over_time({app="devops-python"} | json | message="Request processed" [5m])) +``` + +**404 Errors**: +```logql +{app="devops-python"} | json | status_code=404 +``` + +## Production Hardening + +| Feature | Implementation | +| ---------------- | --------------------------------------------------- | +| Resource limits | Memory and CPU caps on all services via `deploy` | +| Health checks | Loki (`/ready`) and Grafana (`/api/health`) | +| Auth | Grafana anonymous access disabled, password in `.env`| +| Secrets | `.env` excluded from git via `.gitignore` | +| Retention | 7-day auto-deletion via Loki compactor | +| Restart policy | `unless-stopped` on all services | +| Read-only mounts | Config files mounted as `:ro` | +| Dependency order | Promtail and Grafana wait for Loki health | + +## Testing + +```bash +# Generate traffic for log testing +for i in $(seq 1 10); do curl -s http://localhost:8000/ > /dev/null; done +for i in $(seq 1 5); do curl -s http://localhost:8000/nonexistent > /dev/null; done + +# Verify JSON logs +docker logs app-python 2>&1 | head -5 + +# Query Loki directly +curl -G http://localhost:3100/loki/api/v1/query \ + --data-urlencode 'query={app="devops-python"}' | jq . + +# Check Promtail targets +curl http://localhost:9080/targets +``` + +![compose-up](screenshots/compose-up.png) + +![grafana-explore](screenshots/grafana-explore.png) + +![json-logs](screenshots/json-logs.png) + +![dashboard](screenshots/dashboard.png) + +## Bonus: Ansible Automation + +### Role Structure + +``` +roles/monitoring/ +├── defaults/main.yml # All variables (versions, ports, limits) +├── tasks/ +│ ├── main.yml # Orchestration entry point +│ ├── setup.yml # Create dirs, template configs +│ ├── deploy.yml # Docker Compose deployment +│ └── wipe.yml # Teardown (when monitoring_wipe=true) +├── templates/ +│ ├── docker-compose.yml.j2 +│ ├── loki-config.yml.j2 +│ ├── promtail-config.yml.j2 +│ └── grafana-datasource.yml.j2 +├── handlers/main.yml +└── meta/main.yml # Depends on: docker role +``` + +### Key Variables + +| Variable | Default | Purpose | +| -------------------------------- | ----------- | ------------------------------ | +| `monitoring_loki_version` | `3.0.0` | Loki image tag | +| `monitoring_grafana_version` | `12.3.1` | Grafana image tag | +| `monitoring_loki_retention` | `168h` | Log retention period (7 days) | +| `monitoring_compose_dir` | `/opt/monitoring` | Remote deployment path | +| `monitoring_grafana_admin_password` | from vault | Grafana admin password | +| `monitoring_wipe` | `false` | Set `true` to teardown stack | + +All service versions, ports, and resource limits are parameterized via Jinja2 templates. + +### Running the Playbook + +```bash +cd labs-work/ansible + +# Deploy monitoring stack to VM +ansible-playbook playbooks/deploy-monitoring.yml --ask-vault-pass + +# Verify idempotency (second run should show 0 changed) +ansible-playbook playbooks/deploy-monitoring.yml --ask-vault-pass +``` + +### Evidence + +First run - deploys all services: + +![ansible-deploy](screenshots/ansible-deploy.png) + +Second run - idempotent, zero changes: + +![ansible-idempotent](screenshots/ansible-idempotent.png) + +Grafana accessible on the remote VM: + +![vm-grafana](screenshots/vm-grafana.png) + +## Challenges and Solutions + +**Problem:** Loki 3.0 requires TSDB store and schema v13 - older BoltDB configs fail silently + +**Solution:** Used `store: tsdb` with `schema: v13` and matching `tsdb_shipper` storage config + +--- + +**Problem:** Promtail scrapes all containers by default, flooding Loki with infrastructure logs + +**Solution:** Docker SD filter `logging=promtail` label - only explicitly labeled containers are scraped + +--- + +**Problem:** Grafana needs manual datasource setup on first boot + +**Solution:** Provisioning YAML in `grafana/provisioning/datasources/` auto-configures Loki on startup diff --git a/labs-work/monitoring/docs/screenshots/ansible-deploy.png b/labs-work/monitoring/docs/screenshots/ansible-deploy.png new file mode 100644 index 0000000000..ec5534d288 Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/ansible-deploy.png differ diff --git a/labs-work/monitoring/docs/screenshots/ansible-idempotent.png b/labs-work/monitoring/docs/screenshots/ansible-idempotent.png new file mode 100644 index 0000000000..b4c16fede3 Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/ansible-idempotent.png differ diff --git a/labs-work/monitoring/docs/screenshots/compose-up.png b/labs-work/monitoring/docs/screenshots/compose-up.png new file mode 100644 index 0000000000..f42d1daba8 Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/compose-up.png differ diff --git a/labs-work/monitoring/docs/screenshots/dashboard.png b/labs-work/monitoring/docs/screenshots/dashboard.png new file mode 100644 index 0000000000..a805697fdb Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/dashboard.png differ diff --git a/labs-work/monitoring/docs/screenshots/grafana-explore.png b/labs-work/monitoring/docs/screenshots/grafana-explore.png new file mode 100644 index 0000000000..a1141a5efa Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/grafana-explore.png differ diff --git a/labs-work/monitoring/docs/screenshots/json-logs.png b/labs-work/monitoring/docs/screenshots/json-logs.png new file mode 100644 index 0000000000..a0bdca7cd6 Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/json-logs.png differ diff --git a/labs-work/monitoring/docs/screenshots/vm-grafana.png b/labs-work/monitoring/docs/screenshots/vm-grafana.png new file mode 100644 index 0000000000..a1f626c9ee Binary files /dev/null and b/labs-work/monitoring/docs/screenshots/vm-grafana.png differ diff --git a/labs-work/monitoring/grafana/provisioning/dashboards/default.yml b/labs-work/monitoring/grafana/provisioning/dashboards/default.yml new file mode 100644 index 0000000000..aa5f7c1db3 --- /dev/null +++ b/labs-work/monitoring/grafana/provisioning/dashboards/default.yml @@ -0,0 +1,12 @@ +apiVersion: 1 + +providers: + - name: default + orgId: 1 + folder: "" + type: file + disableDeletion: false + editable: true + options: + path: /var/lib/grafana/dashboards + foldersFromFilesStructure: false diff --git a/labs-work/monitoring/grafana/provisioning/datasources/loki.yml b/labs-work/monitoring/grafana/provisioning/datasources/loki.yml new file mode 100644 index 0000000000..050b3c4ac8 --- /dev/null +++ b/labs-work/monitoring/grafana/provisioning/datasources/loki.yml @@ -0,0 +1,9 @@ +apiVersion: 1 + +datasources: + - name: Loki + type: loki + access: proxy + url: http://loki:3100 + isDefault: true + editable: false diff --git a/labs-work/monitoring/loki/config.yml b/labs-work/monitoring/loki/config.yml new file mode 100644 index 0000000000..f71531043d --- /dev/null +++ b/labs-work/monitoring/loki/config.yml @@ -0,0 +1,42 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + replication_factor: 1 + ring: + kvstore: + store: inmemory + +schema_config: + configs: + - from: "2024-01-01" + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + filesystem: + directory: /loki/chunks + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/cache + +ingester: + wal: + dir: /loki/wal + +limits_config: + retention_period: 168h + max_query_series: 100000 + +compactor: + working_directory: /loki/compactor + compaction_interval: 10m + retention_enabled: true + retention_delete_delay: 2h + delete_request_store: filesystem diff --git a/labs-work/monitoring/promtail/config.yml b/labs-work/monitoring/promtail/config.yml new file mode 100644 index 0000000000..d45d55080a --- /dev/null +++ b/labs-work/monitoring/promtail/config.yml @@ -0,0 +1,23 @@ +server: + http_listen_port: 9080 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + filters: + - name: label + values: ["logging=promtail"] + relabel_configs: + - source_labels: ["__meta_docker_container_name"] + regex: "/(.*)" + target_label: "container" + - source_labels: ["__meta_docker_container_label_app"] + target_label: "app" diff --git a/labs-work/pulumi/.gitignore b/labs-work/pulumi/.gitignore new file mode 100644 index 0000000000..587903e612 --- /dev/null +++ b/labs-work/pulumi/.gitignore @@ -0,0 +1,6 @@ +venv/ +.venv/ +__pycache__/ +Pulumi.*.yaml +.idea/ +.vscode/ diff --git a/labs-work/pulumi/Pulumi.yaml b/labs-work/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..72b6c776ce --- /dev/null +++ b/labs-work/pulumi/Pulumi.yaml @@ -0,0 +1,6 @@ +name: devops-iac +runtime: + name: python + options: + virtualenv: venv +description: DevOps course Lab 04 - Infrastructure as Code with Pulumi diff --git a/labs-work/pulumi/__main__.py b/labs-work/pulumi/__main__.py new file mode 100644 index 0000000000..9355ac7579 --- /dev/null +++ b/labs-work/pulumi/__main__.py @@ -0,0 +1,117 @@ +"""DevOps Lab 04 - Infrastructure as Code with Pulumi (Yandex Cloud).""" + +import pulumi +import pulumi_yandex as yandex + +config = pulumi.Config() +folder_id = config.require("folder_id") +zone = config.get("zone") or "ru-central1-a" +instance_name = config.get("instance_name") or "devops-vm" +ssh_user = config.get("ssh_user") or "ubuntu" +ssh_public_key = config.require("ssh_public_key") + +ubuntu_image = yandex.get_compute_image(family="ubuntu-2404-lts") + +network = yandex.VpcNetwork( + "devops-network", + name="devops-network", + folder_id=folder_id, + labels={ + "project": "devops-course", + "lab": "lab04", + }, +) + +subnet = yandex.VpcSubnet( + "devops-subnet", + name="devops-subnet", + zone=zone, + network_id=network.id, + v4_cidr_blocks=["10.0.1.0/24"], + folder_id=folder_id, + labels={ + "project": "devops-course", + "lab": "lab04", + }, +) + +security_group = yandex.VpcSecurityGroup( + "devops-sg", + name="devops-sg", + network_id=network.id, + folder_id=folder_id, + ingresses=[ + yandex.VpcSecurityGroupIngressArgs( + description="Allow SSH", + protocol="TCP", + port=22, + v4_cidr_blocks=["0.0.0.0/0"], + ), + yandex.VpcSecurityGroupIngressArgs( + description="Allow HTTP", + protocol="TCP", + port=80, + v4_cidr_blocks=["0.0.0.0/0"], + ), + yandex.VpcSecurityGroupIngressArgs( + description="Allow app port", + protocol="TCP", + port=5000, + v4_cidr_blocks=["0.0.0.0/0"], + ), + ], + egresses=[ + yandex.VpcSecurityGroupEgressArgs( + description="Allow all outbound traffic", + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"], + ), + ], + labels={ + "project": "devops-course", + "lab": "lab04", + }, +) + +instance = yandex.ComputeInstance( + "devops-vm", + name=instance_name, + platform_id="standard-v2", + zone=zone, + folder_id=folder_id, + resources=yandex.ComputeInstanceResourcesArgs( + cores=2, + core_fraction=20, + memory=1, + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id=ubuntu_image.id, + size=10, + type="network-hdd", + ), + ), + network_interfaces=[ + yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + security_group_ids=[security_group.id], + ), + ], + metadata={ + "ssh-keys": f"{ssh_user}:{ssh_public_key}", + }, + labels={ + "project": "devops-course", + "lab": "lab04", + }, +) + +pulumi.export("vm_public_ip", instance.network_interfaces[0].nat_ip_address) +pulumi.export("vm_id", instance.id) +pulumi.export( + "ssh_connection", + instance.network_interfaces[0].nat_ip_address.apply( + lambda ip: f"ssh {ssh_user}@{ip}" + ), +) diff --git a/labs-work/pulumi/requirements.txt b/labs-work/pulumi/requirements.txt new file mode 100644 index 0000000000..ad106a5476 --- /dev/null +++ b/labs-work/pulumi/requirements.txt @@ -0,0 +1,2 @@ +pulumi>=3.0.0,<4.0.0 +pulumi-yandex>=0.13.0 diff --git a/labs-work/terraform/.gitignore b/labs-work/terraform/.gitignore new file mode 100644 index 0000000000..b9678b1785 --- /dev/null +++ b/labs-work/terraform/.gitignore @@ -0,0 +1,15 @@ +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars +*.pem +*.key +*.json +crash.log +crash.*.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json diff --git a/labs-work/terraform/.tflint.hcl b/labs-work/terraform/.tflint.hcl new file mode 100644 index 0000000000..427121c3ef --- /dev/null +++ b/labs-work/terraform/.tflint.hcl @@ -0,0 +1,4 @@ +plugin "terraform" { + enabled = true + preset = "recommended" +} diff --git a/labs-work/terraform/github/.gitignore b/labs-work/terraform/github/.gitignore new file mode 100644 index 0000000000..b9678b1785 --- /dev/null +++ b/labs-work/terraform/github/.gitignore @@ -0,0 +1,15 @@ +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars +*.pem +*.key +*.json +crash.log +crash.*.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json diff --git a/labs-work/terraform/github/main.tf b/labs-work/terraform/github/main.tf new file mode 100644 index 0000000000..a91f7bb542 --- /dev/null +++ b/labs-work/terraform/github/main.tf @@ -0,0 +1,26 @@ +terraform { + required_version = ">= 1.5" + + required_providers { + github = { + source = "integrations/github" + version = "~> 6.0" + } + } +} + +provider "github" {} + +resource "github_repository" "devops_course" { + name = "DevOps-Core-Course" + description = "DevOps course repository - Infrastructure as Code, CI/CD, containers, and more" + visibility = "public" + + has_issues = true + has_wiki = false + has_projects = false + + lifecycle { + prevent_destroy = true + } +} diff --git a/labs-work/terraform/github/outputs.tf b/labs-work/terraform/github/outputs.tf new file mode 100644 index 0000000000..dbd35a4353 --- /dev/null +++ b/labs-work/terraform/github/outputs.tf @@ -0,0 +1,9 @@ +output "repository_url" { + description = "URL of the GitHub repository" + value = github_repository.devops_course.html_url +} + +output "repository_full_name" { + description = "Full name of the GitHub repository (owner/name)" + value = github_repository.devops_course.full_name +} diff --git a/labs-work/terraform/main.tf b/labs-work/terraform/main.tf new file mode 100644 index 0000000000..60d7dc6219 --- /dev/null +++ b/labs-work/terraform/main.tf @@ -0,0 +1,119 @@ +terraform { + required_version = ">= 1.5" + + required_providers { + yandex = { + source = "yandex-cloud/yandex" + version = ">= 0.13" + } + } +} + +provider "yandex" { + zone = var.zone + folder_id = var.folder_id +} + +data "yandex_compute_image" "ubuntu" { + family = "ubuntu-2404-lts" +} + +resource "yandex_vpc_network" "devops" { + name = "devops-network" + + labels = { + project = "devops-course" + lab = "lab04" + } +} + +resource "yandex_vpc_subnet" "devops" { + name = "devops-subnet" + zone = var.zone + network_id = yandex_vpc_network.devops.id + v4_cidr_blocks = ["10.0.1.0/24"] + + labels = { + project = "devops-course" + lab = "lab04" + } +} + +resource "yandex_vpc_security_group" "devops" { + name = "devops-sg" + network_id = yandex_vpc_network.devops.id + + ingress { + description = "Allow SSH" + protocol = "TCP" + port = 22 + v4_cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "Allow HTTP" + protocol = "TCP" + port = 80 + v4_cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "Allow app port" + protocol = "TCP" + port = 5000 + v4_cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "Allow graphana" + protocol = "TCP" + port = 3000 + v4_cidr_blocks = ["0.0.0.0/0"] + } + + egress { + description = "Allow all outbound traffic" + protocol = "ANY" + v4_cidr_blocks = ["0.0.0.0/0"] + } + + labels = { + project = "devops-course" + lab = "lab04" + } +} + +resource "yandex_compute_instance" "devops" { + name = var.instance_name + platform_id = "standard-v2" + zone = var.zone + + resources { + cores = 2 + core_fraction = 20 + memory = 1 + } + + boot_disk { + initialize_params { + image_id = data.yandex_compute_image.ubuntu.id + size = 10 + type = "network-hdd" + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.devops.id + nat = true + security_group_ids = [yandex_vpc_security_group.devops.id] + } + + metadata = { + ssh-keys = "${var.ssh_user}:${file(var.ssh_public_key_path)}" + } + + labels = { + project = "devops-course" + lab = "lab04" + } +} diff --git a/labs-work/terraform/outputs.tf b/labs-work/terraform/outputs.tf new file mode 100644 index 0000000000..e49b43f350 --- /dev/null +++ b/labs-work/terraform/outputs.tf @@ -0,0 +1,19 @@ +output "vm_public_ip" { + description = "Public IP address of the VM" + value = yandex_compute_instance.devops.network_interface[0].nat_ip_address +} + +output "vm_id" { + description = "ID of the compute instance" + value = yandex_compute_instance.devops.id +} + +output "ssh_connection" { + description = "SSH command to connect to the VM" + value = "ssh ${var.ssh_user}@${yandex_compute_instance.devops.network_interface[0].nat_ip_address}" +} + +output "subnet_id" { + description = "ID of the subnet" + value = yandex_vpc_subnet.devops.id +} diff --git a/labs-work/terraform/variables.tf b/labs-work/terraform/variables.tf new file mode 100644 index 0000000000..3df555904e --- /dev/null +++ b/labs-work/terraform/variables.tf @@ -0,0 +1,28 @@ +variable "zone" { + description = "Yandex Cloud availability zone" + type = string + default = "ru-central1-a" +} + +variable "folder_id" { + description = "Yandex Cloud folder ID" + type = string +} + +variable "instance_name" { + description = "Name of the compute instance" + type = string + default = "devops-vm" +} + +variable "ssh_user" { + description = "SSH username for the VM" + type = string + default = "ubuntu" +} + +variable "ssh_public_key_path" { + description = "Path to the SSH public key file" + type = string + default = "~/.ssh/devops-lab04.pub" +}