diff --git a/.github/workflows/ansible-deploy.yml b/.github/workflows/ansible-deploy.yml new file mode 100644 index 0000000000..8dd016cee1 --- /dev/null +++ b/.github/workflows/ansible-deploy.yml @@ -0,0 +1,86 @@ +name: Ansible Deployment (Lab06) + +on: + push: + branches: [main, master, lab6] + paths: + - "lab6c/ansible/**" + - "!.github/workflows/ansible-deploy.yml" + pull_request: + branches: [main, master, lab6] + paths: + - "lab6c/ansible/**" + +concurrency: + group: ansible-deploy-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + name: Ansible Lint + runs-on: ubuntu-latest + defaults: + run: + working-directory: lab6c/ansible + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and ansible-lint + run: | + pip install ansible ansible-lint + ansible-galaxy collection install -r requirements.yml + + - name: Run ansible-lint + run: ansible-lint playbooks/*.yml 2>/dev/null || echo "Lint finished (warnings may appear)" + + deploy: + name: Deploy Application + needs: lint + runs-on: ubuntu-latest + if: github.event_name == 'push' + defaults: + run: + working-directory: lab6c/ansible + steps: + - uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + + - name: Install Ansible and collections + run: | + pip install ansible + ansible-galaxy collection install -r requirements.yml + + - name: Setup SSH + run: | + mkdir -p ~/.ssh + echo "${{ secrets.SSH_PRIVATE_KEY }}" > ~/.ssh/id_ed25519 + chmod 600 ~/.ssh/id_ed25519 + ssh-keyscan -H "${{ secrets.VM_HOST }}" >> ~/.ssh/known_hosts 2>/dev/null || true + + - name: Deploy with Ansible + env: + ANSIBLE_VAULT_PASSWORD: ${{ secrets.ANSIBLE_VAULT_PASSWORD }} + run: | + echo "$ANSIBLE_VAULT_PASSWORD" > /tmp/vault_pass + chmod 600 /tmp/vault_pass + ansible-playbook playbooks/deploy.yml \ + --vault-password-file /tmp/vault_pass \ + -e ansible_ssh_private_key_file=~/.ssh/id_ed25519 \ + -e ansible_host=${{ secrets.VM_HOST }} \ + -e ansible_user=${{ secrets.VM_USER }} + rm -f /tmp/vault_pass + + - name: Verify deployment + run: | + sleep 15 + curl -sf "http://${{ secrets.VM_HOST }}:5000/health" || echo "Health check failed" + curl -sf "http://${{ secrets.VM_HOST }}:5000/" || echo "Root check failed" diff --git a/.github/workflows/go-ci.yml b/.github/workflows/go-ci.yml new file mode 100644 index 0000000000..e09a65c488 --- /dev/null +++ b/.github/workflows/go-ci.yml @@ -0,0 +1,76 @@ +name: Go CI (Lab03 Bonus) + +on: + push: + branches: [lab03, main, master] + paths: + - "lab3c/app_go/**" + - ".github/workflows/go-ci.yml" + pull_request: + branches: [lab03, main, master] + paths: + - "lab3c/app_go/**" + - ".github/workflows/go-ci.yml" + +concurrency: + group: go-ci-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + name: Lint and Test + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Go + uses: actions/setup-go@v5 + with: + go-version: "1.22" + + - name: golangci-lint + uses: golangci/golangci-lint-action@v6 + with: + working-directory: lab3c/app_go + args: --timeout=5m + + - name: Run tests + working-directory: lab3c/app_go + run: go test ./... + + docker: + name: Build and Push Docker Image + runs-on: ubuntu-latest + needs: test + if: ${{ github.event_name == 'push' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set version (CalVer) + run: echo "VERSION=$(date +%Y.%m.%d)" >> $GITHUB_ENV + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./lab3c/app_go + file: ./lab3c/app_go/Dockerfile + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-go:${{ env.VERSION }} + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-go:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/python-ci.yml b/.github/workflows/python-ci.yml new file mode 100644 index 0000000000..d61adcda2b --- /dev/null +++ b/.github/workflows/python-ci.yml @@ -0,0 +1,109 @@ +name: Python CI (Lab03) + +on: + push: + branches: [lab3, main, master] + paths: + - "lab3c/app_python/**" + - ".github/workflows/python-ci.yml" + pull_request: + branches: [lab3, main, master] + paths: + - "lab3c/app_python/**" + - ".github/workflows/python-ci.yml" + +concurrency: + group: python-ci-${{ github.ref }} + cancel-in-progress: true + +permissions: + contents: read + +jobs: + test: + name: Lint and Test + runs-on: ubuntu-latest + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} + SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} + strategy: + fail-fast: true + matrix: + python-version: ["3.11", "3.12"] + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + cache: "pip" + cache-dependency-path: | + lab3c/app_python/requirements.txt + lab3c/app_python/requirements-dev.txt + + - name: Install dependencies + working-directory: lab3c/app_python + run: | + python -m pip install --upgrade pip + pip install -r requirements.txt -r requirements-dev.txt + + - name: Lint (ruff) + working-directory: lab3c/app_python + run: ruff check . + + - name: Run tests with coverage + working-directory: lab3c/app_python + run: pytest --cov=app --cov-report=xml --cov-report=term + + - name: Upload coverage to Codecov + if: ${{ env.CODECOV_TOKEN != '' }} + uses: codecov/codecov-action@v4 + with: + files: lab3c/app_python/coverage.xml + token: ${{ env.CODECOV_TOKEN }} + + - name: Install Snyk CLI + if: ${{ env.SNYK_TOKEN != '' }} + run: npm install -g snyk + + - name: Snyk scan + if: ${{ env.SNYK_TOKEN != '' }} + working-directory: lab3c/app_python + run: snyk test --file=requirements.txt --package-manager=pip + env: + SNYK_TOKEN: ${{ env.SNYK_TOKEN }} + + docker: + name: Build and Push Docker Image + runs-on: ubuntu-latest + needs: test + if: ${{ github.event_name == 'push' }} + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Log in to Docker Hub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set version (CalVer) + run: echo "VERSION=$(date +%Y.%m.%d)" >> $GITHUB_ENV + + - name: Build and push + uses: docker/build-push-action@v6 + with: + context: ./lab3c/app_python + file: ./lab3c/app_python/Dockerfile + push: true + tags: | + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-python:${{ env.VERSION }} + ${{ secrets.DOCKERHUB_USERNAME }}/devops-info-python:latest + cache-from: type=gha + cache-to: type=gha,mode=max diff --git a/.github/workflows/terraform-ci.yml b/.github/workflows/terraform-ci.yml new file mode 100644 index 0000000000..42a0c50418 --- /dev/null +++ b/.github/workflows/terraform-ci.yml @@ -0,0 +1,51 @@ +name: Terraform Validate (Lab04) + +on: + push: + branches: [lab04, main, master] + paths: + - "lab4c/terraform/**" + - ".github/workflows/terraform-ci.yml" + pull_request: + branches: [lab04, main, master] + paths: + - "lab4c/terraform/**" + - ".github/workflows/terraform-ci.yml" + +jobs: + validate: + name: Format, Validate, Lint + runs-on: ubuntu-latest + defaults: + run: + working-directory: lab4c/terraform + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Terraform + uses: hashicorp/setup-terraform@v3 + with: + terraform_version: "1.9" + terraform_wrapper: false + + - name: Terraform Format Check + run: terraform fmt -check -recursive + + - name: Terraform Init + run: terraform init -backend=false + + - name: Terraform Validate + run: terraform validate + + - name: Setup TFLint + uses: terraform-linters/setup-tflint@v4 + with: + tflint_version: latest + + - name: TFLint Init + run: tflint --init + + - name: TFLint + run: tflint --format compact + continue-on-error: true diff --git a/.gitignore b/.gitignore index 30d74d2584..11a8dd47f9 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,11 @@ -test \ No newline at end of file +test + +# Ansible +*.retry +.vault_pass +ansible/inventory/*.pyc +__pycache__/ + +# Local lab 5 runtime artifacts +lab5c/ansible/.vault_pass +lab5c/ansible/*.retry \ No newline at end of file diff --git a/lab2c/app_go/.dockerignore b/lab2c/app_go/.dockerignore new file mode 100644 index 0000000000..55a3b7cb13 --- /dev/null +++ b/lab2c/app_go/.dockerignore @@ -0,0 +1,7 @@ +*.exe +*.log +.git/ +.gitignore +.idea/ +.vscode/ +docs/ diff --git a/lab2c/app_go/Dockerfile b/lab2c/app_go/Dockerfile new file mode 100644 index 0000000000..534bac98be --- /dev/null +++ b/lab2c/app_go/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.22 AS builder + +WORKDIR /src + +COPY go.mod ./ +RUN go mod download + +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info + +FROM gcr.io/distroless/static-debian12:nonroot + +WORKDIR /app +COPY --from=builder /src/devops-info /app/devops-info + +ENV HOST=0.0.0.0 \ + PORT=5000 + +EXPOSE 5000 + +ENTRYPOINT ["/app/devops-info"] diff --git a/lab2c/app_go/README.md b/lab2c/app_go/README.md new file mode 100644 index 0000000000..36e81eb856 --- /dev/null +++ b/lab2c/app_go/README.md @@ -0,0 +1,41 @@ +# DevOps Info Service (Go) + +## Overview +Compiled-language version of the DevOps info service. It exposes the same two endpoints as the Python app and keeps the JSON response structure consistent. + +## Prerequisites +- Go 1.22+ installed + +## Build and Run +Run directly: +```bash +go run main.go +``` + +Build a binary: +```bash +go build -o devops-info +./devops-info +``` + +Windows build/run: +```bash +go build -o devops-info.exe +.\devops-info.exe +``` + +Custom config examples: +```bash +PORT=8080 go run main.go +HOST=127.0.0.1 PORT=3000 go run main.go +``` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | diff --git a/lab2c/app_go/docs/LAB02.md b/lab2c/app_go/docs/LAB02.md new file mode 100644 index 0000000000..71a016acd1 --- /dev/null +++ b/lab2c/app_go/docs/LAB02.md @@ -0,0 +1,131 @@ +# LAB02 - Docker Containerization (Go, Multi-Stage) + +## Multi-Stage Build Strategy +I used a two-stage Dockerfile: +1. **Builder stage** (`golang:1.22`) to compile the binary. +2. **Runtime stage** (`distroless/static-debian12:nonroot`) to run only the binary. + +This keeps the final image small and removes the Go toolchain from production. + +Dockerfile snippet: +```dockerfile +FROM golang:1.22 AS builder +WORKDIR /src +COPY go.mod ./ +RUN go mod download +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info + +FROM gcr.io/distroless/static-debian12:nonroot +COPY --from=builder /src/devops-info /app/devops-info +ENTRYPOINT ["/app/devops-info"] +``` + + +Image size output: +```text +tsixphoenix/devops-info-go latest 7fc572b1d863 4 minutes ago 17.7MB +``` + +## Build and Run Evidence +Build output: +```text +docker build -t tsixphoenix/devops-info-go:latest . +[+] Building 35.3s (16/16) FINISHED docker:desktop-linux + => [internal] load build definition from Dockerfile 0.0s + => => transferring dockerfile: 396B 0.0s + => [internal] load metadata for gcr.io/distroless/static-debian12:nonroot 1.8s + => [internal] load metadata for docker.io/library/golang:1.22 2.4s + => [auth] library/golang:pull token for registry-1.docker.io 0.0s + => [internal] load .dockerignore 0.0s + => => transferring context: 91B 0.0s + => [builder 1/6] FROM docker.io/library/golang:1.22@sha256:1cf6c45ba39db9fd6db16922041d074a63c935556a05c5ccb62d181034df7f02 22.6s + => => resolve docker.io/library/golang:1.22@sha256:1cf6c45ba39db9fd6db16922041d074a63c935556a05c5ccb62d181034df7f02 0.0s + => => sha256:1451027d3c0ee892b96310c034788bbe22b30b8ea2d075edbd09acfeaaaa439f 126B / 126B 0.4s + => => sha256:afa154b433c7f72db064d19e1bcfa84ee196ad29120328f6bdb2c5fbd7b8eeac 69.36MB / 69.36MB 8.8s + => => sha256:3b7f19923e1501f025b9459750b20f5df37af452482f75b91205f345d1c0e1b5 92.33MB / 92.33MB 10.0s + => => sha256:35af2a7690f2b43e7237d1fae8e3f2350dfb25f3249e9cf65121866f9c56c772 64.39MB / 64.39MB 8.1s + => => sha256:32b550be6cb62359a0f3a96bc0dc289f8b45d097eaad275887f163c6780b4108 24.06MB / 24.06MB 3.8s + => => sha256:a492eee5e55976c7d3feecce4c564aaf6f14fb07fdc5019d06f4154eddc93fde 48.48MB / 48.48MB 5.2s + => => extracting sha256:a492eee5e55976c7d3feecce4c564aaf6f14fb07fdc5019d06f4154eddc93fde 2.3s + => => extracting sha256:32b550be6cb62359a0f3a96bc0dc289f8b45d097eaad275887f163c6780b4108 0.8s + => => extracting sha256:35af2a7690f2b43e7237d1fae8e3f2350dfb25f3249e9cf65121866f9c56c772 2.5s + => => extracting sha256:3b7f19923e1501f025b9459750b20f5df37af452482f75b91205f345d1c0e1b5 2.0s + => => extracting sha256:afa154b433c7f72db064d19e1bcfa84ee196ad29120328f6bdb2c5fbd7b8eeac 5.1s + => => extracting sha256:1451027d3c0ee892b96310c034788bbe22b30b8ea2d075edbd09acfeaaaa439f 0.0s + => => extracting sha256:4f4fb700ef54461cfa02571ae0db9a0dc1e0cdb5577484a6d75e68dc38e8acc1 0.0s + => [internal] load build context 0.1s + => => transferring context: 6.51kB 0.0s + => [stage-1 1/3] FROM gcr.io/distroless/static-debian12:nonroot@sha256:cba10d7abd3e203428e86f5b2d7fd5eb7d8987c387864ae4996cf97191b33764 2.9s + => => resolve gcr.io/distroless/static-debian12:nonroot@sha256:cba10d7abd3e203428e86f5b2d7fd5eb7d8987c387864ae4996cf97191b33764 0.0s + => => sha256:069d1e267530c2e681fbd4d481553b4d05f98082b18fafac86e7f12996dddd0b 131.91kB / 131.91kB 0.6s + => => sha256:dcaa5a89b0ccda4b283e16d0b4d0891cd93d5fe05c6798f7806781a6a2d84354 314B / 314B 0.4s + => => sha256:4aa0ea1413d37a58615488592a0b827ea4b2e48fa5a77cf707d0e35f025e613f 385B / 385B 0.4s + => => sha256:dd64bf2dd177757451a98fcdc999a339c35dee5d9872d8f4dc69c8f3c4dd0112 80B / 80B 0.4s + => => sha256:52630fc75a18675c530ed9eba5f55eca09b03e91bd5bc15307918bbc1a7e7296 162B / 162B 0.3s + => => sha256:3214acf345c0cc6bbdb56b698a41ccdefc624a09d6beb0d38b5de0b2303ecaf4 123B / 123B 0.3s + => => sha256:7c12895b777bcaa8ccae0605b4de635b68fc32d60fa08f421dc3818bf55ee212 188B / 188B 0.3s + => => sha256:2780920e5dbfbe103d03a583ed75345306e572ec5a48cb10361f046767d9f29a 67B / 67B 0.3s + => => sha256:62de241dac5fe19d5f8f4defe034289006ddaa0f2cca735db4718fe2a23e504e 31.24kB / 31.24kB 0.6s + => => sha256:017886f7e1764618ffad6fbd503c42a60076c63adc16355cac80f0f311cae4c9 544.07kB / 544.07kB 0.7s + => => sha256:bfb59b82a9b65e47d485e53b3e815bca3b3e21a095bd0cb88ced9ac0b48062bf 13.36kB / 13.36kB 0.6s + => => sha256:fab8c4b3fa32236a59c44cc504a69b18788d5c17c045691c2d682267ae8cf468 104.22kB / 104.22kB 0.6s + => => extracting sha256:fab8c4b3fa32236a59c44cc504a69b18788d5c17c045691c2d682267ae8cf468 0.1s + => => extracting sha256:bfb59b82a9b65e47d485e53b3e815bca3b3e21a095bd0cb88ced9ac0b48062bf 0.1s + => => extracting sha256:017886f7e1764618ffad6fbd503c42a60076c63adc16355cac80f0f311cae4c9 0.5s + => => extracting sha256:62de241dac5fe19d5f8f4defe034289006ddaa0f2cca735db4718fe2a23e504e 0.1s + => => extracting sha256:2780920e5dbfbe103d03a583ed75345306e572ec5a48cb10361f046767d9f29a 0.0s + => => extracting sha256:7c12895b777bcaa8ccae0605b4de635b68fc32d60fa08f421dc3818bf55ee212 0.0s + => => extracting sha256:3214acf345c0cc6bbdb56b698a41ccdefc624a09d6beb0d38b5de0b2303ecaf4 0.1s + => => extracting sha256:52630fc75a18675c530ed9eba5f55eca09b03e91bd5bc15307918bbc1a7e7296 0.1s + => => extracting sha256:dd64bf2dd177757451a98fcdc999a339c35dee5d9872d8f4dc69c8f3c4dd0112 0.0s + => => extracting sha256:4aa0ea1413d37a58615488592a0b827ea4b2e48fa5a77cf707d0e35f025e613f 0.0s + => => extracting sha256:dcaa5a89b0ccda4b283e16d0b4d0891cd93d5fe05c6798f7806781a6a2d84354 0.0s + => => extracting sha256:069d1e267530c2e681fbd4d481553b4d05f98082b18fafac86e7f12996dddd0b 0.0s + => [stage-1 2/3] WORKDIR /app 0.1s + => [builder 2/6] WORKDIR /src 0.5s + => [builder 3/6] COPY go.mod ./ 0.1s + => [builder 4/6] RUN go mod download 0.5s + => [builder 5/6] COPY main.go ./ 0.1s + => [builder 6/6] RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info 8.1s + => [stage-1 3/3] COPY --from=builder /src/devops-info /app/devops-info 0.1s + => exporting to image 0.6s + => => exporting layers 0.4s + => => exporting manifest sha256:39177489cedb41b9d9f566a8be5d09c8ffe938f98b590aa0ebb987f1cf38d7a6 0.0s + => => exporting config sha256:d86ea6d9a836253c87a0ac2232aa6f03cdc8198146f9acdba1f3d31c617bca82 0.0s + => => exporting attestation manifest sha256:79e9867f53966cbf5943864985b72aeed88ea8a8349789577aee72d45045e5af 0.0s + => => exporting manifest list sha256:7fc572b1d86304a2634962e06610c7cf4295c4a466b6e52aed34f93550555008 0.0s + => => naming to docker.io/tsixphoenix/devops-info-go:latest 0.0s + => => unpacking to docker.io/tsixphoenix/devops-info-go:latest 0.1s + +``` + +Run output: +```text +docker run --rm -p 5000:5000 --name devops-info-go tsixphoenix/devops-info-go:latest +2026/01/29 12:37:42 Starting DevOps Info Service on 0.0.0.0:5000 +``` + +Endpoint checks: +```text +curl http://localhost:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"Go net/http"},"system":{"hostname":"50a30efde177","platform":"linux","platform_version":"Distroless","architecture":"amd64","cpu_count":12,"python_version":"go1.22.12"},"runtime":{"uptime_seconds":79,"uptime_human":"0 hours, 1 minute","current_time":"2026-01-29T12:39:02Z","timezone":"UTC"},"request":{"client_ip":"172.17.0.1","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} + +curl http://localhost:5000/health +{"status":"healthy","timestamp":"2026-01-29T12:39:31Z","uptime_seconds":108} + +2026/01/29 12:39:02 Request: GET / +2026/01/29 12:39:02 Response: GET / -> 200 (418.191µs) +2026/01/29 12:39:31 Request: GET /health +2026/01/29 12:39:31 Response: GET /health -> 200 (114.664µs) +``` + +## Technical Analysis +- The builder stage contains the full Go toolchain; the runtime stage does not. +- If I shipped the builder stage, the image would be much larger and include tools that should not be in production. +- A static binary lets me use a minimal base image. +- The final image runs as a non-root user, which reduces risk. + +## Challenges and Solutions +- I made sure the binary was static (CGO disabled) so it works in a minimal runtime image. +- Distroless images do not include a shell, so debugging is done in the builder stage, not in the runtime image. diff --git a/lab2c/app_go/go.mod b/lab2c/app_go/go.mod new file mode 100644 index 0000000000..7a7fcedd1c --- /dev/null +++ b/lab2c/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.22 diff --git a/lab2c/app_go/main.go b/lab2c/app_go/main.go new file mode 100644 index 0000000000..2abcd3938a --- /dev/null +++ b/lab2c/app_go/main.go @@ -0,0 +1,257 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +type Service struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type System struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + PlatformVersion string `json:"platform_version"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + PythonVersion string `json:"python_version"` +} + +type Runtime struct { + UptimeSeconds int `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type RequestInfo struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type Response struct { + Service Service `json:"service"` + System System `json:"system"` + Runtime Runtime `json:"runtime"` + Request RequestInfo `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +var startTime = time.Now().UTC() + +func main() { + host := getenv("HOST", "0.0.0.0") + port := getenv("PORT", "5000") + addr := net.JoinHostPort(host, port) + + mux := http.NewServeMux() + mux.HandleFunc("/", rootHandler) + mux.HandleFunc("/health", healthHandler) + + handler := recoverMiddleware(loggingMiddleware(mux)) + + server := &http.Server{ + Addr: addr, + Handler: handler, + ReadHeaderTimeout: 5 * time.Second, + } + + log.Printf("Starting DevOps Info Service on %s", addr) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("server error: %v", err) + } +} + +func rootHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, uptimeHuman := getUptime() + now := time.Now().UTC() + + hostname, _ := os.Hostname() + response := Response{ + Service: Service{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "Go net/http", + }, + System: System{ + Hostname: hostname, + Platform: runtime.GOOS, + PlatformVersion: getPlatformVersion(), + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + PythonVersion: runtime.Version(), + }, + Runtime: Runtime{ + UptimeSeconds: uptimeSeconds, + UptimeHuman: uptimeHuman, + CurrentTime: now.Format(time.RFC3339), + Timezone: "UTC", + }, + Request: RequestInfo{ + ClientIP: getClientIP(r), + UserAgent: r.UserAgent(), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + + writeJSON(w, http.StatusOK, response) +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/health" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, _ := getUptime() + payload := map[string]any{ + "status": "healthy", + "timestamp": time.Now().UTC().Format(time.RFC3339), + "uptime_seconds": uptimeSeconds, + } + + writeJSON(w, http.StatusOK, payload) +} + +func getUptime() (int, string) { + seconds := int(time.Since(startTime).Seconds()) + hours := seconds / 3600 + minutes := (seconds % 3600) / 60 + hourLabel := "hours" + if hours == 1 { + hourLabel = "hour" + } + minuteLabel := "minutes" + if minutes == 1 { + minuteLabel = "minute" + } + return seconds, fmt.Sprintf("%d %s, %d %s", hours, hourLabel, minutes, minuteLabel) +} + +func getClientIP(r *http.Request) string { + if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" { + parts := strings.Split(forwarded, ",") + return strings.TrimSpace(parts[0]) + } + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + return host + } + return r.RemoteAddr +} + +func getPlatformVersion() string { + if value := os.Getenv("OS"); value != "" { + return value + } + if data, err := os.ReadFile("/etc/os-release"); err == nil { + for _, line := range strings.Split(string(data), "\n") { + if strings.HasPrefix(line, "PRETTY_NAME=") { + return strings.Trim(strings.TrimPrefix(line, "PRETTY_NAME="), "\"") + } + } + } + return "unknown" +} + +func writeJSON(w http.ResponseWriter, status int, payload any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if err := json.NewEncoder(w).Encode(payload); err != nil { + log.Printf("json encode error: %v", err) + } +} + +func writeNotFound(w http.ResponseWriter) { + writeJSON(w, http.StatusNotFound, map[string]string{ + "error": "Not Found", + "message": "Endpoint does not exist", + }) +} + +func writeMethodNotAllowed(w http.ResponseWriter) { + writeJSON(w, http.StatusMethodNotAllowed, map[string]string{ + "error": "Method Not Allowed", + "message": "Only GET is supported for this endpoint", + }) +} + +type statusRecorder struct { + http.ResponseWriter + status int +} + +func (recorder *statusRecorder) WriteHeader(code int) { + recorder.status = code + recorder.ResponseWriter.WriteHeader(code) +} + +func loggingMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + recorder := &statusRecorder{ResponseWriter: w, status: http.StatusOK} + start := time.Now() + log.Printf("Request: %s %s", r.Method, r.URL.Path) + next.ServeHTTP(recorder, r) + log.Printf("Response: %s %s -> %d (%s)", r.Method, r.URL.Path, recorder.status, time.Since(start)) + }) +} + +func recoverMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + if err := recover(); err != nil { + log.Printf("panic recovered: %v", err) + writeJSON(w, http.StatusInternalServerError, map[string]string{ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }) + } + }() + next.ServeHTTP(w, r) + }) +} + +func getenv(key, fallback string) string { + if value := os.Getenv(key); value != "" { + return value + } + return fallback +} diff --git a/lab2c/app_python/.dockerignore b/lab2c/app_python/.dockerignore new file mode 100644 index 0000000000..b7738de7b8 --- /dev/null +++ b/lab2c/app_python/.dockerignore @@ -0,0 +1,12 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env +.git/ +.gitignore +.idea/ +.vscode/ +docs/ +tests/ diff --git a/lab2c/app_python/.gitignore b/lab2c/app_python/.gitignore new file mode 100644 index 0000000000..8052e93c8b --- /dev/null +++ b/lab2c/app_python/.gitignore @@ -0,0 +1,14 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/lab2c/app_python/Dockerfile b/lab2c/app_python/Dockerfile new file mode 100644 index 0000000000..76219e6c10 --- /dev/null +++ b/lab2c/app_python/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN useradd -m -u 10001 appuser + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY --chown=appuser:appuser app.py . + +USER appuser + +EXPOSE 5000 + +CMD ["python", "app.py"] diff --git a/lab2c/app_python/README.md b/lab2c/app_python/README.md new file mode 100644 index 0000000000..742a7439f4 --- /dev/null +++ b/lab2c/app_python/README.md @@ -0,0 +1,72 @@ +# DevOps Info Service (FastAPI) + +## Overview +Small service returning system info about the machine it runs on, plus a health check. + +## Prerequisites +- Python 3.11+ +- pip +- (Optional) venv tool + +## Installation +### Windows +```bash +python -m venv venv +.\venv\Scripts\Activate.ps1 +pip install -r requirements.txt +``` + +### macOS/Linux +```bash +python -m venv venv +source venv/bin/activate +pip install -r requirements.txt +``` + +## Running the Application +```bash +python app.py +``` + +Custom config examples: +```bash +PORT=8080 python app.py +HOST=127.0.0.1 PORT=3000 python app.py +``` + +FastAPI docs: +- `http://localhost:/docs` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | +| `DEBUG` | `False` | Enable auto-reload | + +## Docker +Command patterns (replace the placeholders with your values): + +**Build locally** +```bash +docker build -t /: . +``` + +**Run container** +```bash +docker run --rm -p :5000 --name /: +``` + +**Pull from Docker Hub** +```bash +docker pull /: +``` + +Optional env overrides: +```bash +docker run --rm -e PORT=5000 -e HOST=0.0.0.0 -p :5000 /: +``` diff --git a/lab2c/app_python/app.py b/lab2c/app_python/app.py new file mode 100644 index 0000000000..8935b94091 --- /dev/null +++ b/lab2c/app_python/app.py @@ -0,0 +1,158 @@ +""" +DevOps Info Service +FastAPI application module. +""" + +from __future__ import annotations + +import logging +import os +import platform +import socket +from datetime import datetime, timezone + +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse +from starlette.exceptions import HTTPException as StarletteHTTPException + +# Config +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" + +SERVICE_NAME = "devops-info-service" +SERVICE_VERSION = "1.0.0" +SERVICE_DESCRIPTION = "DevOps course info service" +SERVICE_FRAMEWORK = "FastAPI" + +START_TIME = datetime.now(timezone.utc) + +logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(levelname)s - %(message)s", +) +logger = logging.getLogger("devops-info-service") + +app = FastAPI( + title="DevOps Info Service", + version=SERVICE_VERSION, + description=SERVICE_DESCRIPTION, +) + + +def _format_uptime(seconds: int) -> str: + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + hour_label = "hour" if hours == 1 else "hours" + minute_label = "minute" if minutes == 1 else "minutes" + return f"{hours} {hour_label}, {minutes} {minute_label}" + + +def get_uptime() -> dict[str, int | str]: + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + return { + "seconds": seconds, + "human": _format_uptime(seconds), + } + + +def get_system_info() -> dict[str, str | int]: + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.release(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def isoformat_utc(dt: datetime) -> str: + return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + + +@app.middleware("http") +async def log_requests(request: Request, call_next): + logger.info("Request: %s %s", request.method, request.url.path) + response = await call_next(request) + logger.info("Response: %s %s -> %s", request.method, request.url.path, response.status_code) + return response + + +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + if exc.status_code == 404: + return JSONResponse( + status_code=404, + content={ + "error": "Not Found", + "message": "Endpoint does not exist", + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={"error": exc.detail}, + ) + + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + logger.exception("Unhandled error: %s", exc) + return JSONResponse( + status_code=500, + content={ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }, + ) + + +@app.get("/") +async def root(request: Request): + uptime = get_uptime() + now = datetime.now(timezone.utc) + + response = { + "service": { + "name": SERVICE_NAME, + "version": SERVICE_VERSION, + "description": SERVICE_DESCRIPTION, + "framework": SERVICE_FRAMEWORK, + }, + "system": get_system_info(), + "runtime": { + "uptime_seconds": uptime["seconds"], + "uptime_human": uptime["human"], + "current_time": isoformat_utc(now), + "timezone": "UTC", + }, + "request": { + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + "method": request.method, + "path": request.url.path, + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"}, + ], + } + + return response + + +@app.get("/health") +async def health(): + uptime = get_uptime() + return { + "status": "healthy", + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "uptime_seconds": uptime["seconds"], + } + + +if __name__ == "__main__": + logger.info("Starting DevOps Info Service on %s:%s", HOST, PORT) + uvicorn.run("app:app", host=HOST, port=PORT, reload=DEBUG, log_level="info") diff --git a/lab2c/app_python/docs/LAB02.md b/lab2c/app_python/docs/LAB02.md new file mode 100644 index 0000000000..dd91a49278 --- /dev/null +++ b/lab2c/app_python/docs/LAB02.md @@ -0,0 +1,111 @@ +# LAB02 - Docker Containerization (Python) + +## Docker Best Practices Applied +- **Pinned base image**: `python:3.13-slim` keeps the image small and reproducible. +- **Non-root user**: the container runs as `appuser`, so the service does not run as root. +- **Layer caching**: dependencies are installed before copying the app so rebuilds are faster. +- **Minimal copy**: only `requirements.txt` and `app.py` are copied into the image. +- **.dockerignore**: excluded tests, docs, and virtualenvs to keep the build context small. + +Dockerfile snippet: +```dockerfile +FROM python:3.13-slim +WORKDIR /app +RUN useradd -m -u 10001 appuser +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt +COPY --chown=appuser:appuser app.py . +USER appuser +``` + +## Image Information and Decisions +- **Base image choice**: `python:3.13-slim` is a good balance of size and compatibility. +- **Final image size**: `` +- **Layer structure**: dependencies are installed in their own layer to benefit from caching. +- **Optimization choices**: small base image, no extra build tools, only required files copied. + +Image size output: +```text +tsixphoenix/devops-info-python beta 04eec5e16beb 5 minutes ago 228MB +``` + +## Build and Run Process +Build output: +```text +docker build -t tsixphoenix/devops-info-python:beta . +[+] Building 16.7s (11/11) FINISHED docker:desktop-linux + => [internal] load build definition from Dockerfile 0.0s + => => transferring dockerfile: 332B 0.0s + => [internal] load metadata for docker.io/library/python:3.13-slim 2.3s + => [internal] load .dockerignore 0.0s + => => transferring context: 133B 0.0s + => [1/6] FROM docker.io/library/python:3.13-slim@sha256:51e1a0a317fdb6e170dc791bbeae63fac5272c82f43958ef74a34e170c6f8b18 2.4s + => => resolve docker.io/library/python:3.13-slim@sha256:51e1a0a317fdb6e170dc791bbeae63fac5272c82f43958ef74a34e170c6f8b18 0.0s + => => sha256:8843ea38a07e15ac1b99c72108fbb492f737032986cc0b65ed351f84e5521879 1.29MB / 1.29MB 0.5s + => => sha256:36b6de65fd8d6bd36071ea9efa7d078ebdc11ecc23d2426ec9c3e9f092ae824d 249B / 249B 0.6s + => => sha256:0bee50492702eb5d822fbcbac8f545a25f5fe173ec8030f57691aefcc283bbc9 11.79MB / 11.79MB 1.5s + => => extracting sha256:8843ea38a07e15ac1b99c72108fbb492f737032986cc0b65ed351f84e5521879 0.3s + => => extracting sha256:0bee50492702eb5d822fbcbac8f545a25f5fe173ec8030f57691aefcc283bbc9 0.8s + => => extracting sha256:36b6de65fd8d6bd36071ea9efa7d078ebdc11ecc23d2426ec9c3e9f092ae824d 0.0s + => [internal] load build context 0.0s + => => transferring context: 4.60kB 0.0s + => [2/6] WORKDIR /app 0.1s + => [3/6] RUN useradd -m -u 10001 appuser 0.6s + => [4/6] COPY requirements.txt . 0.0s + => [5/6] RUN pip install --no-cache-dir -r requirements.txt 8.8s + => [6/6] COPY --chown=appuser:appuser app.py . 0.1s + => exporting to image 2.1s + => => exporting layers 1.4s + => => exporting manifest sha256:89257312508e9a26af1f7400253d9556816a0fc9230a414836bcedb8a4881c86 0.0s + => => exporting config sha256:a7d85cde725e6fdfb1dfbccbb9daadb4138561a5698ac01f5f6e2780b62994f3 0.0s + => => exporting attestation manifest sha256:82c962563c14aaa47813d2f1b62afb9806c83dbb0519256fd9954a50ea14fd3f 0.0s + => => exporting manifest list sha256:04eec5e16beb90a39cdac694238e9c6301410b6fa987d7b7788c03287ed57da0 0.0s + => => naming to docker.io/tsixphoenix/devops-info-python:beta 0.0s + => => unpacking to docker.io/tsixphoenix/devops-info-python:beta +``` + +Run output (container start): +```text +docker run --rm -p 5000:5000 --name devops-info tsixphoenix/devops-info-python:beta +2026-01-29 12:23:57,799 - INFO - Starting DevOps Info Service on 0.0.0.0:5000 +INFO: Started server process [1] +INFO: Waiting for application startup. +INFO: Application startup complete. +INFO: Uvicorn running on http://0.0.0.0:5000 (Press CTRL+C to quit) +``` + +Endpoint checks: +```text +curl http://localhost:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"d65d9dfde3f9","platform":"Linux","platform_version":"6.6.87.2-microsoft-standard-WSL2","architecture":"x86_64","cpu_count":12,"python_version":"3.13.11"},"runtime":{"uptime_seconds":98,"uptime_human":"0 hours, 1 minute","current_time":"2026-01-29T12:25:35.964833Z","timezone":"UTC"},"request":{"client_ip":"172.17.0.1","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} + +curl http://localhost:5000/health +{"status":"healthy","timestamp":"2026-01-29T12:25:56.660917Z","uptime_seconds":118} + +2026-01-29 12:25:35,964 - INFO - Request: GET / +2026-01-29 12:25:35,965 - INFO - Response: GET / -> 200 +INFO: 172.17.0.1:54462 - "GET / HTTP/1.1" 200 OK +2026-01-29 12:25:56,659 - INFO - Request: GET /health +2026-01-29 12:25:56,661 - INFO - Response: GET /health -> 200 +INFO: 172.17.0.1:57328 - "GET /health HTTP/1.1" 200 OK +``` + +Docker Hub repository URL: +``` +https://hub.docker.com/repository/docker/tsixphoenix/devops-info-python/general +``` + +Tagging strategy: +``` +version tag +``` + +## Technical Analysis +- The Dockerfile copies `requirements.txt` first so dependency layers are cached between builds. +- If I copied the whole project before installing dependencies, every code change would bust the cache. +- Running as a non-root user reduces risk if a container is compromised. +- `.dockerignore` keeps the build context small, which speeds up the build and reduces image size. + +## Challenges and Solutions +- I verified the app binds to `0.0.0.0` so it is reachable from outside the container. +- I double-checked that only the needed files are copied into the image to avoid bloating it. diff --git a/lab2c/app_python/requirements.txt b/lab2c/app_python/requirements.txt new file mode 100644 index 0000000000..792449289f --- /dev/null +++ b/lab2c/app_python/requirements.txt @@ -0,0 +1,2 @@ +fastapi==0.115.0 +uvicorn[standard]==0.32.0 diff --git a/lab2c/app_python/tests/__init__.py b/lab2c/app_python/tests/__init__.py new file mode 100644 index 0000000000..792d600548 --- /dev/null +++ b/lab2c/app_python/tests/__init__.py @@ -0,0 +1 @@ +# diff --git a/lab3c/app_go/.dockerignore b/lab3c/app_go/.dockerignore new file mode 100644 index 0000000000..55a3b7cb13 --- /dev/null +++ b/lab3c/app_go/.dockerignore @@ -0,0 +1,7 @@ +*.exe +*.log +.git/ +.gitignore +.idea/ +.vscode/ +docs/ diff --git a/lab3c/app_go/Dockerfile b/lab3c/app_go/Dockerfile new file mode 100644 index 0000000000..534bac98be --- /dev/null +++ b/lab3c/app_go/Dockerfile @@ -0,0 +1,21 @@ +FROM golang:1.22 AS builder + +WORKDIR /src + +COPY go.mod ./ +RUN go mod download + +COPY main.go ./ +RUN CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o devops-info + +FROM gcr.io/distroless/static-debian12:nonroot + +WORKDIR /app +COPY --from=builder /src/devops-info /app/devops-info + +ENV HOST=0.0.0.0 \ + PORT=5000 + +EXPOSE 5000 + +ENTRYPOINT ["/app/devops-info"] diff --git a/lab3c/app_go/README.md b/lab3c/app_go/README.md new file mode 100644 index 0000000000..36e81eb856 --- /dev/null +++ b/lab3c/app_go/README.md @@ -0,0 +1,41 @@ +# DevOps Info Service (Go) + +## Overview +Compiled-language version of the DevOps info service. It exposes the same two endpoints as the Python app and keeps the JSON response structure consistent. + +## Prerequisites +- Go 1.22+ installed + +## Build and Run +Run directly: +```bash +go run main.go +``` + +Build a binary: +```bash +go build -o devops-info +./devops-info +``` + +Windows build/run: +```bash +go build -o devops-info.exe +.\devops-info.exe +``` + +Custom config examples: +```bash +PORT=8080 go run main.go +HOST=127.0.0.1 PORT=3000 go run main.go +``` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | diff --git a/lab3c/app_go/docs/LAB03.md b/lab3c/app_go/docs/LAB03.md new file mode 100644 index 0000000000..2ae68b20de --- /dev/null +++ b/lab3c/app_go/docs/LAB03.md @@ -0,0 +1,16 @@ +# LAB03 - CI/CD (Go Bonus) + +## Multi-App CI Summary +I added a separate workflow for the Go app with its own path filters. This keeps Python and Go CI independent and avoids running jobs that are not needed. + +## Path Filters +- Go workflow runs only when `lab3c/app_go/**` or its workflow file changes. +- Python workflow runs only when `lab3c/app_python/**` or its workflow file changes. + +## Workflow Evidence +- **Go workflow run:** +- **Docker image on Docker Hub:** + +## Notes +- Go CI uses `go test` and a basic lint step. +- Docker builds use the same CalVer tag scheme as Python. diff --git a/lab3c/app_go/go.mod b/lab3c/app_go/go.mod new file mode 100644 index 0000000000..7a7fcedd1c --- /dev/null +++ b/lab3c/app_go/go.mod @@ -0,0 +1,3 @@ +module devops-info-service + +go 1.22 diff --git a/lab3c/app_go/main.go b/lab3c/app_go/main.go new file mode 100644 index 0000000000..2abcd3938a --- /dev/null +++ b/lab3c/app_go/main.go @@ -0,0 +1,257 @@ +package main + +import ( + "encoding/json" + "fmt" + "log" + "net" + "net/http" + "os" + "runtime" + "strings" + "time" +) + +type Service struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + Framework string `json:"framework"` +} + +type System struct { + Hostname string `json:"hostname"` + Platform string `json:"platform"` + PlatformVersion string `json:"platform_version"` + Architecture string `json:"architecture"` + CPUCount int `json:"cpu_count"` + PythonVersion string `json:"python_version"` +} + +type Runtime struct { + UptimeSeconds int `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + CurrentTime string `json:"current_time"` + Timezone string `json:"timezone"` +} + +type RequestInfo struct { + ClientIP string `json:"client_ip"` + UserAgent string `json:"user_agent"` + Method string `json:"method"` + Path string `json:"path"` +} + +type Endpoint struct { + Path string `json:"path"` + Method string `json:"method"` + Description string `json:"description"` +} + +type Response struct { + Service Service `json:"service"` + System System `json:"system"` + Runtime Runtime `json:"runtime"` + Request RequestInfo `json:"request"` + Endpoints []Endpoint `json:"endpoints"` +} + +var startTime = time.Now().UTC() + +func main() { + host := getenv("HOST", "0.0.0.0") + port := getenv("PORT", "5000") + addr := net.JoinHostPort(host, port) + + mux := http.NewServeMux() + mux.HandleFunc("/", rootHandler) + mux.HandleFunc("/health", healthHandler) + + handler := recoverMiddleware(loggingMiddleware(mux)) + + server := &http.Server{ + Addr: addr, + Handler: handler, + ReadHeaderTimeout: 5 * time.Second, + } + + log.Printf("Starting DevOps Info Service on %s", addr) + if err := server.ListenAndServe(); err != nil && err != http.ErrServerClosed { + log.Fatalf("server error: %v", err) + } +} + +func rootHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, uptimeHuman := getUptime() + now := time.Now().UTC() + + hostname, _ := os.Hostname() + response := Response{ + Service: Service{ + Name: "devops-info-service", + Version: "1.0.0", + Description: "DevOps course info service", + Framework: "Go net/http", + }, + System: System{ + Hostname: hostname, + Platform: runtime.GOOS, + PlatformVersion: getPlatformVersion(), + Architecture: runtime.GOARCH, + CPUCount: runtime.NumCPU(), + PythonVersion: runtime.Version(), + }, + Runtime: Runtime{ + UptimeSeconds: uptimeSeconds, + UptimeHuman: uptimeHuman, + CurrentTime: now.Format(time.RFC3339), + Timezone: "UTC", + }, + Request: RequestInfo{ + ClientIP: getClientIP(r), + UserAgent: r.UserAgent(), + Method: r.Method, + Path: r.URL.Path, + }, + Endpoints: []Endpoint{ + {Path: "/", Method: "GET", Description: "Service information"}, + {Path: "/health", Method: "GET", Description: "Health check"}, + }, + } + + writeJSON(w, http.StatusOK, response) +} + +func healthHandler(w http.ResponseWriter, r *http.Request) { + if r.URL.Path != "/health" { + writeNotFound(w) + return + } + if r.Method != http.MethodGet { + writeMethodNotAllowed(w) + return + } + + uptimeSeconds, _ := getUptime() + payload := map[string]any{ + "status": "healthy", + "timestamp": time.Now().UTC().Format(time.RFC3339), + "uptime_seconds": uptimeSeconds, + } + + writeJSON(w, http.StatusOK, payload) +} + +func getUptime() (int, string) { + seconds := int(time.Since(startTime).Seconds()) + hours := seconds / 3600 + minutes := (seconds % 3600) / 60 + hourLabel := "hours" + if hours == 1 { + hourLabel = "hour" + } + minuteLabel := "minutes" + if minutes == 1 { + minuteLabel = "minute" + } + return seconds, fmt.Sprintf("%d %s, %d %s", hours, hourLabel, minutes, minuteLabel) +} + +func getClientIP(r *http.Request) string { + if forwarded := r.Header.Get("X-Forwarded-For"); forwarded != "" { + parts := strings.Split(forwarded, ",") + return strings.TrimSpace(parts[0]) + } + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err == nil { + return host + } + return r.RemoteAddr +} + +func getPlatformVersion() string { + if value := os.Getenv("OS"); value != "" { + return value + } + if data, err := os.ReadFile("/etc/os-release"); err == nil { + for _, line := range strings.Split(string(data), "\n") { + if strings.HasPrefix(line, "PRETTY_NAME=") { + return strings.Trim(strings.TrimPrefix(line, "PRETTY_NAME="), "\"") + } + } + } + return "unknown" +} + +func writeJSON(w http.ResponseWriter, status int, payload any) { + w.Header().Set("Content-Type", "application/json") + w.WriteHeader(status) + if err := json.NewEncoder(w).Encode(payload); err != nil { + log.Printf("json encode error: %v", err) + } +} + +func writeNotFound(w http.ResponseWriter) { + writeJSON(w, http.StatusNotFound, map[string]string{ + "error": "Not Found", + "message": "Endpoint does not exist", + }) +} + +func writeMethodNotAllowed(w http.ResponseWriter) { + writeJSON(w, http.StatusMethodNotAllowed, map[string]string{ + "error": "Method Not Allowed", + "message": "Only GET is supported for this endpoint", + }) +} + +type statusRecorder struct { + http.ResponseWriter + status int +} + +func (recorder *statusRecorder) WriteHeader(code int) { + recorder.status = code + recorder.ResponseWriter.WriteHeader(code) +} + +func loggingMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + recorder := &statusRecorder{ResponseWriter: w, status: http.StatusOK} + start := time.Now() + log.Printf("Request: %s %s", r.Method, r.URL.Path) + next.ServeHTTP(recorder, r) + log.Printf("Response: %s %s -> %d (%s)", r.Method, r.URL.Path, recorder.status, time.Since(start)) + }) +} + +func recoverMiddleware(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + defer func() { + if err := recover(); err != nil { + log.Printf("panic recovered: %v", err) + writeJSON(w, http.StatusInternalServerError, map[string]string{ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }) + } + }() + next.ServeHTTP(w, r) + }) +} + +func getenv(key, fallback string) string { + if value := os.Getenv(key); value != "" { + return value + } + return fallback +} diff --git a/lab3c/app_go/main_test.go b/lab3c/app_go/main_test.go new file mode 100644 index 0000000000..b8ba60fefa --- /dev/null +++ b/lab3c/app_go/main_test.go @@ -0,0 +1,54 @@ +package main + +import ( + "encoding/json" + "net/http" + "net/http/httptest" + "testing" +) + +func TestRootHandlerOK(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/", nil) + rec := httptest.NewRecorder() + + rootHandler(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var payload map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil { + t.Fatalf("invalid json: %v", err) + } + + if _, ok := payload["service"]; !ok { + t.Fatal("missing service section") + } + if _, ok := payload["system"]; !ok { + t.Fatal("missing system section") + } + if _, ok := payload["runtime"]; !ok { + t.Fatal("missing runtime section") + } +} + +func TestHealthHandlerOK(t *testing.T) { + req := httptest.NewRequest(http.MethodGet, "/health", nil) + rec := httptest.NewRecorder() + + healthHandler(rec, req) + + if rec.Code != http.StatusOK { + t.Fatalf("expected 200, got %d", rec.Code) + } + + var payload map[string]any + if err := json.Unmarshal(rec.Body.Bytes(), &payload); err != nil { + t.Fatalf("invalid json: %v", err) + } + + if payload["status"] != "healthy" { + t.Fatalf("unexpected status: %v", payload["status"]) + } +} diff --git a/lab3c/app_python/.dockerignore b/lab3c/app_python/.dockerignore new file mode 100644 index 0000000000..b7738de7b8 --- /dev/null +++ b/lab3c/app_python/.dockerignore @@ -0,0 +1,12 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env +.git/ +.gitignore +.idea/ +.vscode/ +docs/ +tests/ diff --git a/lab3c/app_python/.gitignore b/lab3c/app_python/.gitignore new file mode 100644 index 0000000000..8052e93c8b --- /dev/null +++ b/lab3c/app_python/.gitignore @@ -0,0 +1,14 @@ +__pycache__/ +*.py[cod] +*.log +venv/ +.venv/ +.env + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/lab3c/app_python/Dockerfile b/lab3c/app_python/Dockerfile new file mode 100644 index 0000000000..76219e6c10 --- /dev/null +++ b/lab3c/app_python/Dockerfile @@ -0,0 +1,19 @@ +FROM python:3.13-slim + +ENV PYTHONDONTWRITEBYTECODE=1 \ + PYTHONUNBUFFERED=1 + +WORKDIR /app + +RUN useradd -m -u 10001 appuser + +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +COPY --chown=appuser:appuser app.py . + +USER appuser + +EXPOSE 5000 + +CMD ["python", "app.py"] diff --git a/lab3c/app_python/README.md b/lab3c/app_python/README.md new file mode 100644 index 0000000000..e12a3ea6bc --- /dev/null +++ b/lab3c/app_python/README.md @@ -0,0 +1,78 @@ +# DevOps Info Service (FastAPI) + +[![Python CI](https://github.com/TsixPhoenix/DevOps-CC/actions/workflows/python-ci.yml/badge.svg?branch=lab03)](https://github.com/TsixPhoenix/DevOps-CC/actions/workflows/python-ci.yml) + + +## Overview +Small service returning system info about the machine it runs on, plus a health check. + +## Prerequisites +- Python 3.11+ +- pip +- (Optional) venv tool + +## Installation +```bash +python -m venv venv +.\venv\Scripts\Activate.ps1 +pip install -r requirements.txt -r requirements-dev.txt +``` + +## Running the Application +```bash +python app.py +``` + +Custom config examples: +```bash +PORT=8080 python app.py +HOST=127.0.0.1 PORT=3000 python app.py +``` + +FastAPI docs: +- `http://localhost:/docs` + +## Tests +Run locally: +```bash +pytest +``` + +Run with coverage: +```bash +pytest --cov=app --cov-report=term +``` + +## API Endpoints +- `GET /` - Service and system information +- `GET /health` - Health check + +## Configuration +| Variable | Default | Description | +| --- | --- | --- | +| `HOST` | `0.0.0.0` | Bind address for the server | +| `PORT` | `5000` | Port to listen on | +| `DEBUG` | `False` | Enable auto-reload | + +## Docker +Command patterns (replace the placeholders with your values): + +**Build locally** +```bash +docker build -t /: . +``` + +**Run container** +```bash +docker run --rm -p :5000 --name /: +``` + +**Pull from Docker Hub** +```bash +docker pull /: +``` + +Optional env overrides: +```bash +docker run --rm -e PORT=5000 -e HOST=0.0.0.0 -p :5000 /: +``` diff --git a/lab3c/app_python/app.py b/lab3c/app_python/app.py new file mode 100644 index 0000000000..763238476c --- /dev/null +++ b/lab3c/app_python/app.py @@ -0,0 +1,287 @@ +""" +DevOps Info Service +FastAPI application module. +""" + +from __future__ import annotations + +import json +import logging +import os +import platform +import socket +import time +from datetime import datetime, timezone + +import uvicorn +from fastapi import FastAPI, Request +from fastapi.responses import JSONResponse, Response +from prometheus_client import ( + CONTENT_TYPE_LATEST, + CollectorRegistry, + Counter, + Gauge, + Histogram, + generate_latest, +) +from starlette.exceptions import HTTPException as StarletteHTTPException + +# Use a custom registry so we don't clash with the default one (avoids "Duplicated timeseries" when the app module is loaded more than once). +_prometheus_registry = CollectorRegistry() + +# Prometheus metrics (RED: Rate, Errors, Duration) +http_requests_total = Counter( + "http_requests_total", + "Total HTTP requests", + ["method", "endpoint", "status"], + registry=_prometheus_registry, +) +http_request_duration_seconds = Histogram( + "http_request_duration_seconds", + "HTTP request duration in seconds", + ["method", "endpoint"], + buckets=(0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1.0, 2.5, 5.0), + registry=_prometheus_registry, +) +http_requests_in_progress = Gauge( + "http_requests_in_progress", + "HTTP requests currently being processed", + registry=_prometheus_registry, +) +# Application-specific metrics +devops_info_endpoint_calls = Counter( + "devops_info_endpoint_calls", + "Endpoint calls for DevOps info service", + ["endpoint"], + registry=_prometheus_registry, +) +devops_info_system_collection_seconds = Histogram( + "devops_info_system_collection_seconds", + "System info collection time in seconds", + buckets=(0.001, 0.005, 0.01, 0.025, 0.05, 0.1), + registry=_prometheus_registry, +) + +# Config +HOST = os.getenv("HOST", "0.0.0.0") +PORT = int(os.getenv("PORT", "5000")) +DEBUG = os.getenv("DEBUG", "False").lower() == "true" + +SERVICE_NAME = "devops-info-service" +SERVICE_VERSION = "1.0.0" +SERVICE_DESCRIPTION = "DevOps course info service" +SERVICE_FRAMEWORK = "FastAPI" + +START_TIME = datetime.now(timezone.utc) + +logger = logging.getLogger("devops-info-service") +logger.setLevel(logging.INFO) + +handler = logging.StreamHandler() +handler.setLevel(logging.INFO) +logger.handlers = [handler] + +app = FastAPI( + title="DevOps Info Service", + version=SERVICE_VERSION, + description=SERVICE_DESCRIPTION, +) + + +def _format_uptime(seconds: int) -> str: + hours = seconds // 3600 + minutes = (seconds % 3600) // 60 + hour_label = "hour" if hours == 1 else "hours" + minute_label = "minute" if minutes == 1 else "minutes" + return f"{hours} {hour_label}, {minutes} {minute_label}" + + +def get_uptime() -> dict[str, int | str]: + delta = datetime.now(timezone.utc) - START_TIME + seconds = int(delta.total_seconds()) + return { + "seconds": seconds, + "human": _format_uptime(seconds), + } + + +def get_system_info() -> dict[str, str | int]: + return { + "hostname": socket.gethostname(), + "platform": platform.system(), + "platform_version": platform.release(), + "architecture": platform.machine(), + "cpu_count": os.cpu_count() or 0, + "python_version": platform.python_version(), + } + + +def isoformat_utc(dt: datetime) -> str: + return dt.astimezone(timezone.utc).isoformat().replace("+00:00", "Z") + + +def _normalize_endpoint(path: str) -> str: + """Normalize path for metric labels to keep cardinality low.""" + if path in ("/", "/health", "/metrics"): + return path + return "other" + + +@app.middleware("http") +async def log_requests(request: Request, call_next): + start_time = time.perf_counter() + endpoint = _normalize_endpoint(request.url.path) + http_requests_in_progress.inc() + try: + response = await call_next(request) + status = str(response.status_code) + http_requests_total.labels( + method=request.method, endpoint=endpoint, status=status + ).inc() + http_request_duration_seconds.labels( + method=request.method, endpoint=endpoint + ).observe(time.perf_counter() - start_time) + return response + finally: + http_requests_in_progress.dec() + + +@app.middleware("http") +async def log_requests(request: Request, call_next): + start_time = datetime.now(timezone.utc) + logger.info( + json.dumps( + { + "timestamp": isoformat_utc(start_time), + "level": "INFO", + "service": SERVICE_NAME, + "event": "request", + "method": request.method, + "path": request.url.path, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + } + ) + ) + response = await call_next(request) + end_time = datetime.now(timezone.utc) + logger.info( + json.dumps( + { + "timestamp": isoformat_utc(end_time), + "level": "INFO", + "service": SERVICE_NAME, + "event": "response", + "method": request.method, + "path": request.url.path, + "status": response.status_code, + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + } + ) + ) + return response + + +@app.exception_handler(StarletteHTTPException) +async def http_exception_handler(request: Request, exc: StarletteHTTPException): + if exc.status_code == 404: + return JSONResponse( + status_code=404, + content={ + "error": "Not Found", + "message": "Endpoint does not exist", + }, + ) + return JSONResponse( + status_code=exc.status_code, + content={"error": exc.detail}, + ) + + +@app.exception_handler(Exception) +async def unhandled_exception_handler(request: Request, exc: Exception): + logger.error( + json.dumps( + { + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "level": "ERROR", + "service": SERVICE_NAME, + "event": "exception", + "method": request.method, + "path": request.url.path, + "error": str(exc), + } + ) + ) + return JSONResponse( + status_code=500, + content={ + "error": "Internal Server Error", + "message": "An unexpected error occurred", + }, + ) + + +@app.get("/metrics") +async def metrics(): + """Prometheus metrics endpoint.""" + return Response( + content=generate_latest(_prometheus_registry), + media_type=CONTENT_TYPE_LATEST, + ) + + +@app.get("/") +async def root(request: Request): + devops_info_endpoint_calls.labels(endpoint="/").inc() + t0 = time.perf_counter() + sys_info = get_system_info() + devops_info_system_collection_seconds.observe(time.perf_counter() - t0) + uptime = get_uptime() + now = datetime.now(timezone.utc) + + response = { + "service": { + "name": SERVICE_NAME, + "version": SERVICE_VERSION, + "description": SERVICE_DESCRIPTION, + "framework": SERVICE_FRAMEWORK, + }, + "system": sys_info, + "runtime": { + "uptime_seconds": uptime["seconds"], + "uptime_human": uptime["human"], + "current_time": isoformat_utc(now), + "timezone": "UTC", + }, + "request": { + "client_ip": request.client.host if request.client else "unknown", + "user_agent": request.headers.get("user-agent", "unknown"), + "method": request.method, + "path": request.url.path, + }, + "endpoints": [ + {"path": "/", "method": "GET", "description": "Service information"}, + {"path": "/health", "method": "GET", "description": "Health check"}, + {"path": "/metrics", "method": "GET", "description": "Prometheus metrics"}, + ], + } + + return response + + +@app.get("/health") +async def health(): + devops_info_endpoint_calls.labels(endpoint="/health").inc() + uptime = get_uptime() + return { + "status": "healthy", + "timestamp": isoformat_utc(datetime.now(timezone.utc)), + "uptime_seconds": uptime["seconds"], + } + + +if __name__ == "__main__": + logger.info("Starting DevOps Info Service on %s:%s", HOST, PORT) + uvicorn.run("app:app", host=HOST, port=PORT, reload=DEBUG, log_level="info") diff --git a/lab3c/app_python/docs/LAB03.md b/lab3c/app_python/docs/LAB03.md new file mode 100644 index 0000000000..530353eaba --- /dev/null +++ b/lab3c/app_python/docs/LAB03.md @@ -0,0 +1,66 @@ +# LAB03 - CI/CD (Python) + +## 1. Overview +**Testing framework:** I used `pytest`. The syntax is clean, fixtures are easy to work with, and it is the default choice in most Python projects I see. + +**What tests cover:** The tests hit `GET /`, `GET /health`, a 404 case, and helper functions like uptime formatting. I focused on structure and types instead of exact machine values. + +**Workflow triggers:** CI runs on push and pull requests to `lab03`, `main`, or `master`, but only when `lab3c/app_python/**` or the workflow file changes. + +**Versioning strategy:** I chose CalVer (YYYY.MM.DD). It is simple, and this service is released continuously rather than as a library. + +## 2. Workflow Evidence +Add real links and outputs after you run CI: +- **Successful workflow run:** https://github.com/TsixPhoenix/DevOps-CC/actions/runs/21865003310/job/63103839665 +- **Tests passing locally:** +pytest +============================================================================================ test session starts ============================================================================================ +platform win32 -- Python 3.12.2, pytest-9.0.2, pluggy-1.6.0 +rootdir: C:\Users\Phoenix\PycharmProjects\DevOps\DevOps-CC\lab3c\app_python +configfile: pyproject.toml +plugins: anyio-4.12.1, cov-7.0.0 +collected 5 items + +tests\test_app.py ..... [100%] + +============================================================================================= 5 passed in 0.36s ============================================================================================= +- **Docker image on Docker Hub:** https://hub.docker.com/repository/docker/tsixphoenix/devops-info-python/general +- **Status badge:** Works, shows green check + +## 3. Best Practices Implemented +- **Dependency caching:** `actions/setup-python` caches pip packages to speed up installs. +- **Job separation:** tests run in one job, Docker build/push depends on test success. +- **Conditional push:** Docker images only push on `push` events (not on PRs). +- **Concurrency:** newer runs cancel older runs for the same branch. +- **Path filters:** CI runs only when the Python app changes (monorepo friendly). +- **Snyk scanning:** dependency scan runs in CI. + + +Snyk result: +``` +Run snyk test --file=requirements.txt --package-manager=pip + +Testing /home/runner/work/DevOps-CC/DevOps-CC/lab3c/app_python... + +Organization: tsixphoenix +Package manager: pip +Target file: requirements.txt +Project name: app_python +Open source: no +Project path: /home/runner/work/DevOps-CC/DevOps-CC/lab3c/app_python +Licenses: enabled + +✔ Tested 13 dependencies for known issues, no vulnerable paths found. +``` + +## 4. Key Decisions +**Versioning Strategy:** CalVer fits a small service that ships frequently. It is easy to read and does not require manual version bumps. + +**Docker Tags:** The workflow publishes `YYYY.MM.DD` and `latest` tags for the same image. + +**Workflow Triggers:** I used path filters to avoid running Python CI when only Go code changes. + +**Test Coverage:** Core endpoints and helper functions are tested. I did not try to cover every logging line. + +## 5. Challenges +- Everything was clear, because of experience of setting up CI/CD in my company workspace. diff --git a/lab3c/app_python/pyproject.toml b/lab3c/app_python/pyproject.toml new file mode 100644 index 0000000000..efb9a85312 --- /dev/null +++ b/lab3c/app_python/pyproject.toml @@ -0,0 +1,3 @@ +[tool.ruff] +select = ["E", "F"] +ignore = ["E501"] diff --git a/lab3c/app_python/requirements-dev.txt b/lab3c/app_python/requirements-dev.txt new file mode 100644 index 0000000000..c6610506e3 --- /dev/null +++ b/lab3c/app_python/requirements-dev.txt @@ -0,0 +1,5 @@ +pytest +pytest-cov +requests +ruff +httpx diff --git a/lab3c/app_python/requirements.txt b/lab3c/app_python/requirements.txt new file mode 100644 index 0000000000..d663691063 --- /dev/null +++ b/lab3c/app_python/requirements.txt @@ -0,0 +1,3 @@ +fastapi==0.128.6 +uvicorn[standard]==0.32.0 +prometheus-client==0.23.1 diff --git a/lab3c/app_python/tests/__init__.py b/lab3c/app_python/tests/__init__.py new file mode 100644 index 0000000000..792d600548 --- /dev/null +++ b/lab3c/app_python/tests/__init__.py @@ -0,0 +1 @@ +# diff --git a/lab3c/app_python/tests/test_app.py b/lab3c/app_python/tests/test_app.py new file mode 100644 index 0000000000..ff942f197e --- /dev/null +++ b/lab3c/app_python/tests/test_app.py @@ -0,0 +1,66 @@ +from datetime import datetime, timezone + +from fastapi.testclient import TestClient + +from app import _format_uptime, app, get_system_info, get_uptime, isoformat_utc + + +client = TestClient(app) + + +def test_root_endpoint_structure(): + response = client.get("/") + assert response.status_code == 200 + + data = response.json() + assert "service" in data + assert "system" in data + assert "runtime" in data + assert "request" in data + assert "endpoints" in data + + service = data["service"] + assert service["name"] == "devops-info-service" + assert service["framework"] == "FastAPI" + + system = data["system"] + for key in ["hostname", "platform", "platform_version", "architecture", "cpu_count", "python_version"]: + assert key in system + + runtime = data["runtime"] + assert isinstance(runtime["uptime_seconds"], int) + assert isinstance(runtime["uptime_human"], str) + assert runtime["timezone"] == "UTC" + + +def test_health_endpoint_structure(): + response = client.get("/health") + assert response.status_code == 200 + data = response.json() + assert data["status"] == "healthy" + assert isinstance(data["uptime_seconds"], int) + assert "timestamp" in data + + +def test_not_found_returns_json(): + response = client.get("/does-not-exist") + assert response.status_code == 404 + data = response.json() + assert data["error"] == "Not Found" + + +def test_helpers_are_consistent(): + system = get_system_info() + assert system["hostname"] + assert system["platform"] + assert system["python_version"] + + uptime = get_uptime() + assert uptime["seconds"] >= 0 + assert "hours" in uptime["human"] or "hour" in uptime["human"] + + +def test_format_and_iso_helpers(): + assert _format_uptime(3660) == "1 hour, 1 minute" + test_dt = datetime(2024, 1, 1, tzinfo=timezone.utc) + assert isoformat_utc(test_dt) == "2024-01-01T00:00:00Z" diff --git a/lab4c/docs/LAB04.md b/lab4c/docs/LAB04.md new file mode 100644 index 0000000000..5af0ddc8e7 --- /dev/null +++ b/lab4c/docs/LAB04.md @@ -0,0 +1,190 @@ +# LAB04 — Infrastructure as Code (Terraform & Pulumi) + +## 1. Cloud Provider & Infrastructure + +- **Cloud provider:** Yandex Cloud. +- **Rationale:** Used Yandex Cloud for this lab because of the free tier +- **Instance:** standard-v3, 2 cores 20%, 1 GB RAM, 10 GB disk. +- **Zone:** ru-central1-a. +- **Estimated cost:** Effectively $0 with the free tier for this kind of usage. +- **Resources created:** + - 1× VPC network + - 1× subnet + - 1× security group (SSH 22, HTTP 80, 5000) + - 1× compute instance (Ubuntu 22.04) + - Public IP + +## 2. Terraform Implementation + +- **Terraform version:** Terraform v1.14.5 +- **Project structure:** `terraform/` — main.tf (provider, Ubuntu image data source, VPC, subnet, security group, instance), variables.tf, outputs.tf, terraform.tfvars (gitignored). Auth via service account key path in tfvars +- **Key decisions:** Variables for folder_id, zone, SSH key path, and SSH CIDR so the same code works across environments. Data source for the latest Ubuntu 22.04 LTS image. Security group restricts SSH to our IP only; HTTP and 5000 are open for the app. +- **Challenges:** Getting auth right at first; I ended up putting the key file path in terraform.tfvars). Also hit the VPC network quota once and had to extend it. + +**Terminal output:** + +- `terraform init`: + ``` +terraform init +Initializing the backend... +Initializing provider plugins... +- Finding yandex-cloud/yandex versions matching "~> 0.100"... +- Installing yandex-cloud/yandex v0.187.0... +- Installed yandex-cloud/yandex v0.187.0 + ``` +- `terraform plan`: + ``` +terraform plan +data.yandex_compute_image.ubuntu: Reading... +data.yandex_compute_image.ubuntu: Read complete after 0s [id=***********] + +Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the +following symbols: + + create + +Terraform will perform the following actions: + + # yandex_compute_instance.lab4 will be created + + resource "yandex_compute_instance" "lab4" { + ``` +- `terraform apply`: + ``` +terraform apply +data.yandex_compute_image.ubuntu: Reading... +data.yandex_compute_image.ubuntu: Read complete after 0s [id=***********] + +Terraform used the selected providers to generate the following execution plan. Resource actions are indicated with the +following symbols: + + create + +Terraform will perform the following actions: + + # yandex_compute_instance.lab4 will be created + + resource "yandex_compute_instance" "lab4" { + ``` +- `SSH to VM`: + ``` +The programs included with the Ubuntu system are free software; +the exact distribution terms for each program are described in the +individual files in /usr/share/doc/*/copyright. + +Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by +applicable law. + +To run a command as administrator (user "root"), use "sudo ". +See "man sudo_root" for details. + +ubuntu@fhm24d5clqr3oh7b101s:~$ + ``` + +## 3. Pulumi Implementation + +- **Pulumi version and language:** Pulumi v3.222.0, Python 3.x. +- **How it differs from Terraform:** Same logical resources (VPC, subnet, security group, VM), but defined in Python. You get normal Python (loops, functions, types) and the same state/plan/apply workflow. +- **Advantages:** Felt easier. Outputs are straightforward. +- **Challenges:** Initial setup took a bit: venv, `setuptools<82` for `pkg_resources`, and provider auth. + +**Terminal output:** + +- `pulumi preview`: + ``` + pulumi preview +Previewing update (dev) + +View in Browser (Ctrl+O): + + Type Name Plan Info + + pulumi:pulumi:Stack lab4c-vm-dev create 2 messages + + ├─ yandex:index:VpcNetwork lab4c-network create + + ├─ yandex:index:VpcSubnet lab4c-subnet create + + ├─ yandex:index:VpcSecurityGroup lab4c-vm-sg create + + └─ yandex:index:ComputeInstance lab4c-vm create +Diagnostics: + pulumi:pulumi:Stack (lab4c-vm-dev): + import pkg_resources + +Outputs: + public_ip : [unknown] + ssh_command: [unknown] + +Resources: + + 5 to create + ``` +- `pulumi up`: + ``` + pulumi up +Previewing update (dev) + +View in Browser (Ctrl+O): + Type Name Plan Info + pulumi:pulumi:Stack lab4c-vm-dev 2 messages + + ├─ yandex:index:VpcNetwork lab4c-network create + + ├─ yandex:index:VpcSubnet lab4c-subnet create + + ├─ yandex:index:VpcSecurityGroup lab4c-vm-sg create + + └─ yandex:index:ComputeInstance lab4c-vm create +Diagnostics: + pulumi:pulumi:Stack (lab4c-vm-dev): + import pkg_resources + + [Pulumi Neo] Would you like help with these diagnostics? + +Outputs: + + public_ip : [unknown] + + ssh_command: [unknown] + +Resources: + + 4 to create + 1 unchanged + +Do you want to perform this update? yes +Updating (dev) + ``` +- SSH to VM: + ``` + The programs included with the Ubuntu system are free software; +the exact distribution terms for each program are described in the +individual files in /usr/share/doc/*/copyright. + +Ubuntu comes with ABSOLUTELY NO WARRANTY, to the extent permitted by +applicable law. + +To run a command as administrator (user "root"), use "sudo ". +See "man sudo_root" for details. + +ubuntu@fhm8nea1kubnsde4ooqn:~$ + ``` + +## 4. Terraform vs Pulumi Comparison + +- **Ease of learning:** Terraform is easier if you only care about “describe resources in a file and apply.” HCL is small and focused. Pulumi is easier if you already know Python and want to use normal code; +- **Code readability:** Both are readable. Terraform is very declarative: you see resources and attributes. Pulumi looks like normal code, so you can structure it with variables and functions. +- **Debugging:** With Terraform, you rely on plan/apply messages and sometimes `terraform state`. With Pulumi, you get Python stack traces and can add prints or a debugger; the program runs in your environment, which helps. +- **Documentation:** all services are well documented +- **Use case:** I’d pick Terraform when the team is standardizing on it, when you want maximum portability (HCL, big ecosystem), or when you’re mostly gluing provider resources. I’d pick Pulumi when the team is code-first, when you want to share logic with the rest of your app (same language, tests, refactors), or when you need loops, conditionals, or abstractions that are clumsy in HCL. + +## 5. Lab 5 Preparation & Cleanup + +**VM for Lab 5:** + +- **Keeping VM for Lab 5?** No. +- **Plan for Lab 5:** Will recreate a cloud VM when needed + +**Cleanup status:** +``` +terraform destroy +Destroy complete! Resources: 4 destroyed. +``` +and +``` +pulumi destroy +Previewing destroy (dev) + +View in Browser (Ctrl+O): + + Type Name Plan + - pulumi:pulumi:Stack lab4c-vm-dev delete + - ├─ yandex:index:ComputeInstance lab4c-vm delete + - ├─ yandex:index:VpcSubnet lab4c-subnet delete + - ├─ yandex:index:VpcSecurityGroup lab4c-vm-sg delete + - └─ yandex:index:VpcNetwork lab4c-network delete +``` diff --git a/lab4c/pulumi/.gitignore b/lab4c/pulumi/.gitignore new file mode 100644 index 0000000000..25c1585ba2 --- /dev/null +++ b/lab4c/pulumi/.gitignore @@ -0,0 +1,19 @@ +# Pulumi +Pulumi.*.yaml +!Pulumi.yaml +.pulumi/ + +# Python +__pycache__/ +*.py[cod] +venv/ +.venv/ +*.egg-info/ + +# IDE +.idea/ +.vscode/ + +# OS +.DS_Store +Thumbs.db diff --git a/lab4c/pulumi/Pulumi.yaml b/lab4c/pulumi/Pulumi.yaml new file mode 100644 index 0000000000..a3e2d9124e --- /dev/null +++ b/lab4c/pulumi/Pulumi.yaml @@ -0,0 +1,4 @@ +name: lab4c-vm +runtime: python +description: Lab 4 - VM on Yandex cloud + diff --git a/lab4c/pulumi/README.md b/lab4c/pulumi/README.md new file mode 100644 index 0000000000..5c263e239d --- /dev/null +++ b/lab4c/pulumi/README.md @@ -0,0 +1,50 @@ +# Lab 4 — Pulumi (Yandex Cloud) + +Same infrastructure as the Terraform stack: one VM, VPC, subnet, security group (SSH, HTTP, 5000). + +## Prerequisites + +- Pulumi CLI 3.x +- Python 3.9+ +- Yandex Cloud account (same auth as for Terraform: `YANDEX_TOKEN` or service account key) + +## Config + + +```bash +pulumi config set folder_id your-yandex-folder-id +pulumi config set ssh_cidr "YOUR_IP/32" +pulumi config set ssh_public_key "$(cat %USERPROFILE%\.ssh\id_rsa.pub)" +``` + +```powershell +pulumi config set ssh_public_key "$(Get-Content $env:USERPROFILE\.ssh\id_rsa.pub -Raw)" +``` + +Optional: `pulumi config set zone ru-central1-a` + +## Setup + +1. Log in to Pulumi: `pulumi login` +2. Create stack: `pulumi stack init dev` +3. Install deps and run: + + ```powershell + python -m venv venv + .\venv\Scripts\Activate.ps1 + pip install -r requirements.txt + pulumi preview + pulumi up + ``` + +4. SSH to VM: + + ```powershell + ssh ubuntu@$(pulumi stack output public_ip) + ``` + +## Cleanup + +```bash +pulumi destroy +``` diff --git a/lab4c/pulumi/__main__.py b/lab4c/pulumi/__main__.py new file mode 100644 index 0000000000..54482de1c3 --- /dev/null +++ b/lab4c/pulumi/__main__.py @@ -0,0 +1,102 @@ +"""Lab 4 - Create VM on Yandex Cloud (same as Terraform).""" +import os +import pulumi + +config = pulumi.Config() +key_file = config.get("yandex_service_account_key_file") +if key_file: + os.environ["YANDEX_SERVICE_ACCOUNT_KEY_FILE"] = key_file + +import pulumi_yandex as yandex + +folder_id = config.require("folder_id") +zone = config.get("zone") or "ru-central1-a" +ssh_public_key = config.require("ssh_public_key") +ssh_cidr = config.require("ssh_cidr") + +# Ubuntu 22.04 LTS +image = yandex.get_compute_image(family="ubuntu-2204-lts") + +network = yandex.VpcNetwork( + "lab4c-network", + name="lab4c-network", + folder_id=folder_id, +) + +subnet = yandex.VpcSubnet( + "lab4c-subnet", + name="lab4c-subnet", + network_id=network.id, + zone=zone, + folder_id=folder_id, + v4_cidr_blocks=["10.0.1.0/24"], +) + +sg = yandex.VpcSecurityGroup( + "lab4c-vm-sg", + name="lab4c-vm-sg", + network_id=network.id, + folder_id=folder_id, + description="Allow SSH, HTTP, and port 5000 for Lab 4", + ingresses=[ + yandex.VpcSecurityGroupIngressArgs( + description="SSH", + port=22, + protocol="TCP", + v4_cidr_blocks=[ssh_cidr], + ), + yandex.VpcSecurityGroupIngressArgs( + description="HTTP", + port=80, + protocol="TCP", + v4_cidr_blocks=["0.0.0.0/0"], + ), + yandex.VpcSecurityGroupIngressArgs( + description="App 5000", + port=5000, + protocol="TCP", + v4_cidr_blocks=["0.0.0.0/0"], + ), + ], + egresses=[ + yandex.VpcSecurityGroupEgressArgs( + description="Any", + protocol="ANY", + v4_cidr_blocks=["0.0.0.0/0"], + ), + ], +) + +vm = yandex.ComputeInstance( + "lab4c-vm", + name="lab4c-vm", + zone=zone, + folder_id=folder_id, + platform_id="standard-v3", + resources=yandex.ComputeInstanceResourcesArgs( + cores=2, + memory=1, + core_fraction=20, + ), + boot_disk=yandex.ComputeInstanceBootDiskArgs( + initialize_params=yandex.ComputeInstanceBootDiskInitializeParamsArgs( + image_id=image.id, + size=10, + type="network-hdd", + ), + ), + network_interfaces=[ + yandex.ComputeInstanceNetworkInterfaceArgs( + subnet_id=subnet.id, + nat=True, + security_group_ids=[sg.id], + ), + ], + metadata={ + "ssh-keys": f"ubuntu:{ssh_public_key}", + }, + labels={"lab": "lab04"}, +) + +pulumi.export("public_ip", vm.network_interfaces[0].nat_ip_address) +pulumi.export("ssh_command", pulumi.Output.concat("ssh ubuntu@", vm.network_interfaces[0].nat_ip_address)) diff --git a/lab4c/pulumi/requirements.txt b/lab4c/pulumi/requirements.txt new file mode 100644 index 0000000000..c2955fc5a6 --- /dev/null +++ b/lab4c/pulumi/requirements.txt @@ -0,0 +1,3 @@ +pulumi>=3.0.0 +pulumi-yandex>=0.13.0 +setuptools>=65.0.0,<82 diff --git a/lab4c/terraform/.gitignore b/lab4c/terraform/.gitignore new file mode 100644 index 0000000000..df419991b7 --- /dev/null +++ b/lab4c/terraform/.gitignore @@ -0,0 +1,21 @@ +# Terraform +*.tfstate +*.tfstate.* +.terraform/ +.terraform.lock.hcl +terraform.tfvars +*.tfvars +*.tfvars.json +crash.log +override.tf +override.tf.json +*_override.tf +*_override.tf.json + +# Credentials +*.pem +*.key +*.json +!package.json +credentials +.env diff --git a/lab4c/terraform/.tflint.hcl b/lab4c/terraform/.tflint.hcl new file mode 100644 index 0000000000..75d15f14aa --- /dev/null +++ b/lab4c/terraform/.tflint.hcl @@ -0,0 +1,3 @@ +plugin "terraform" { + enabled = true +} diff --git a/lab4c/terraform/README.md b/lab4c/terraform/README.md new file mode 100644 index 0000000000..aee3cb8444 --- /dev/null +++ b/lab4c/terraform/README.md @@ -0,0 +1,59 @@ +# Lab 4 — Terraform (Yandex Cloud) + +Creates one VM (free tier: 2 cores 20%, 1 GB RAM, 10 GB disk), VPC, subnet, security group (SSH, HTTP, 5000), and outputs public IP. + +## Prerequisites + +- Terraform 1.9+ +- Yandex Cloud account +- SSH key pair on your machine (e.g. `ssh-keygen`); you will use the **public** key path in Terraform + +## Authentication + +Use one of these (do not commit secrets): + +1. **OAuth token (quick):** + `set YANDEX_TOKEN=your_oauth_token` (cmd) or `$env:YANDEX_TOKEN = "..."` (PowerShell) + +2. **Service account key file:** + Create a service account in Yandex Cloud Console, create an authorized key (JSON), then: + `set YANDEX_SERVICE_ACCOUNT_KEY_FILE=C:\path\to\key.json` + or in `terraform.tfvars`: `yandex_token` (prefer env vars). + +3. **Folder ID:** + In Console: Cloud → folder → copy ID. Set in `terraform.tfvars` as `yandex_folder_id`. + +## Setup + +1. Copy and edit variables: + - **Windows:** `copy terraform.tfvars.example terraform.tfvars` + - **Linux/macOS:** `cp terraform.tfvars.example terraform.tfvars` + Edit: + - `yandex_folder_id` — your folder ID + - `yandex_zone` — e.g. `ru-central1-a` + - `ssh_public_key_path` — full path to your `.pub` file (e.g. `C:\Users\You\.ssh\id_rsa.pub` or `%USERPROFILE%\.ssh\id_rsa.pub`) + - `ssh_cidr` — your IP/32 (e.g. from https://ifconfig.me) + +2. Initialize and apply: + ```bash + terraform init + terraform plan + terraform apply + ``` + +3. SSH to VM (no `-i` needed if you use the same key as the one in metadata): + - **PowerShell:** `ssh ubuntu@$(terraform output -raw public_ip)` + - Or: `ssh -i C:\path\to\your_private_key ubuntu@` + +## Cleanup + +```bash +terraform destroy +``` + +## Files + +- `main.tf` — provider, network, subnet, security group, instance +- `variables.tf` — folder_id, zone, ssh_public_key_path, ssh_cidr +- `outputs.tf` — public_ip, ssh_command +- `terraform.tfvars` — your values (gitignored) diff --git a/lab4c/terraform/main.tf b/lab4c/terraform/main.tf new file mode 100644 index 0000000000..5152f76f06 --- /dev/null +++ b/lab4c/terraform/main.tf @@ -0,0 +1,101 @@ +terraform { + required_version = ">= 1.9" + required_providers { + yandex = { + source = "yandex-cloud/yandex" + version = "~> 0.100" + } + } +} + +provider "yandex" { + zone = var.yandex_zone + folder_id = var.yandex_folder_id + service_account_key_file = var.yandex_service_account_key_file +} + +# Ubuntu 22.04 +data "yandex_compute_image" "ubuntu" { + family = "ubuntu-2204-lts" +} + +resource "yandex_vpc_network" "lab4" { + name = "lab4c-network" +} + +resource "yandex_vpc_subnet" "lab4" { + name = "lab4c-subnet" + network_id = yandex_vpc_network.lab4.id + zone = var.yandex_zone + v4_cidr_blocks = ["10.0.1.0/24"] +} + +resource "yandex_vpc_security_group" "lab4" { + name = "lab4c-vm-sg" + network_id = yandex_vpc_network.lab4.id + description = "Allow SSH, HTTP, and port 5000 for Lab 4" + + ingress { + description = "SSH" + port = 22 + protocol = "TCP" + v4_cidr_blocks = [var.ssh_cidr] + } + + ingress { + description = "HTTP" + port = 80 + protocol = "TCP" + v4_cidr_blocks = ["0.0.0.0/0"] + } + + ingress { + description = "App 5000" + port = 5000 + protocol = "TCP" + v4_cidr_blocks = ["0.0.0.0/0"] + } + + egress { + description = "Any" + from_port = 0 + to_port = 65535 + protocol = "ANY" + v4_cidr_blocks = ["0.0.0.0/0"] + } +} + +resource "yandex_compute_instance" "lab4" { + name = "lab4c-vm" + platform_id = "standard-v3" + zone = var.yandex_zone + folder_id = var.yandex_folder_id + + resources { + cores = 2 + memory = 1 + core_fraction = 20 + } + + boot_disk { + initialize_params { + image_id = data.yandex_compute_image.ubuntu.id + size = 10 + type = "network-hdd" + } + } + + network_interface { + subnet_id = yandex_vpc_subnet.lab4.id + nat = true + security_group_ids = [yandex_vpc_security_group.lab4.id] + } + + metadata = { + ssh-keys = "ubuntu:${file(var.ssh_public_key_path)}" + } + + labels = { + lab = "lab04" + } +} diff --git a/lab4c/terraform/outputs.tf b/lab4c/terraform/outputs.tf new file mode 100644 index 0000000000..2821ecd1bc --- /dev/null +++ b/lab4c/terraform/outputs.tf @@ -0,0 +1,9 @@ +output "public_ip" { + description = "Public IP of the VM" + value = yandex_compute_instance.lab4.network_interface[0].nat_ip_address +} + +output "ssh_command" { + description = "Example SSH command" + value = "ssh ubuntu@${yandex_compute_instance.lab4.network_interface[0].nat_ip_address}" +} diff --git a/lab4c/terraform/terraform.tfvars.example b/lab4c/terraform/terraform.tfvars.example new file mode 100644 index 0000000000..f36767e4d3 --- /dev/null +++ b/lab4c/terraform/terraform.tfvars.example @@ -0,0 +1,5 @@ +yandex_folder_id = +yandex_zone = +yandex_service_account_key_file = +ssh_public_key_path = +ssh_cidr = "1.2.3.4/32" diff --git a/lab4c/terraform/variables.tf b/lab4c/terraform/variables.tf new file mode 100644 index 0000000000..0d121ae84c --- /dev/null +++ b/lab4c/terraform/variables.tf @@ -0,0 +1,26 @@ +variable "yandex_folder_id" { + description = "Yandex Cloud folder ID" + type = string +} + +variable "yandex_zone" { + description = "Yandex Cloud zone" + type = string + default = "ru-central1-a" +} + +variable "ssh_public_key_path" { + description = "Path to your SSH public key file" + type = string +} + +variable "ssh_cidr" { + description = "CIDR allowed for SSH" + type = string +} + +variable "yandex_service_account_key_file" { + description = "Path to Yandex service account JSON key" + type = string + default = null +} diff --git a/lab5c/README.md b/lab5c/README.md new file mode 100644 index 0000000000..b59d1ce2cc --- /dev/null +++ b/lab5c/README.md @@ -0,0 +1,38 @@ +# Lab 05 Completion (`lab5c`) + + +## Structure + +- `ansible/ansible.cfg` - project configuration +- `ansible/inventory/hosts.ini` - static inventory template +- `ansible/roles/common` - base system setup role +- `ansible/roles/docker` - Docker installation role +- `ansible/roles/app_deploy` - app deployment role +- `ansible/playbooks/provision.yml` - provisioning playbook +- `ansible/playbooks/deploy.yml` - deployment playbook +- `ansible/playbooks/site.yml` - full provision + deploy flow +- `ansible/group_vars/all.yml.example` - vault variable template +- `ansible/docs/LAB05.md` - documentation template with analysis + +## Control-Node Setup (WSL) + +```bash +sudo apt update +sudo apt install -y ansible +ansible-galaxy collection install -r requirements.yml +``` + +Bonus dynamic-inventory collection: + +```bash +ansible-galaxy collection install -r requirements-bonus.yml +``` + +## Typical Run Order + +```bash +ansible all -m ping +ansible-playbook playbooks/provision.yml +ansible-playbook playbooks/provision.yml +ansible-playbook playbooks/deploy.yml --ask-vault-pass +``` \ No newline at end of file diff --git a/lab5c/ansible/.gitignore b/lab5c/ansible/.gitignore new file mode 100644 index 0000000000..b487bb7be0 --- /dev/null +++ b/lab5c/ansible/.gitignore @@ -0,0 +1,4 @@ +.vault_pass +*.retry +inventory/*.pyc +__pycache__/ diff --git a/lab5c/ansible/ansible.cfg b/lab5c/ansible/ansible.cfg new file mode 100644 index 0000000000..2fb9889628 --- /dev/null +++ b/lab5c/ansible/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False +interpreter_python = auto_silent +timeout = 30 + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/lab5c/ansible/docs/LAB05.md b/lab5c/ansible/docs/LAB05.md new file mode 100644 index 0000000000..7db4ecb2c6 --- /dev/null +++ b/lab5c/ansible/docs/LAB05.md @@ -0,0 +1,180 @@ +# LAB05 — Ansible Fundamentals (Role-Based) + +## 1. Architecture Overview + +- **Ansible version used:** Ansible Core 2.17.8. +- **Control node:** Windows 10 + Docker Desktop (Ansible executed in container). +- **Target VM:** Ubuntu 22.04/24.04 VM from Lab 4 (cloud), connected via SSH. +- **Role structure:** Three roles are used: + - `common` - baseline OS preparation + - `docker` - Docker engine installation and service setup + - `app_deploy` - Dockerized app deployment and health verification +- **Why roles instead of one large playbook:** Roles isolate responsibilities, keep playbooks clean, and make each part reusable. + +## 2. Roles Documentation + +### Role: `common` + +- **Purpose:** Prepare the system with required base packages and timezone. +- **Variables (defaults):** + - `common_packages` - essential packages list (`curl`, `git`, `python3-pip`, etc.) + - `common_timezone` - default `UTC` +- **Handlers:** None. +- **Dependencies:** `community.general` collection (for timezone module). + +### Role: `docker` + +- **Purpose:** Install Docker from the official Docker APT repository and prepare runtime access. +- **Variables (defaults):** + - `docker_arch_map`, `docker_arch` + - `docker_packages` (`docker-ce`, `docker-ce-cli`, `containerd.io`, plugins) + - `docker_python_package` (`python3-docker`) + - `docker_user` (user added to `docker` group) +- **Handlers:** + - `restart docker` - restarts Docker service when package changes require it. +- **Dependencies:** Uses Ansible built-in modules; installs `python3-docker` on target host for Docker-related modules. + +### Role: `app_deploy` + +- **Purpose:** Authenticate to Docker Hub, pull image, replace container, and verify app health. +- **Variables (defaults):** + - `app_name`, `docker_image`, `docker_image_tag` + - `app_port`, `app_container_name` + - `app_restart_policy`, `app_env` + - `app_health_path`, `app_wait_timeout` + - Vaulted vars: `dockerhub_username`, `dockerhub_password` +- **Handlers:** + - `restart app container` - restarts running container when deployment task triggers notify. +- **Dependencies:** `community.docker` collection. + +## 3. Idempotency Demonstration + +### First run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] changed +TASK [common : Install common packages] changed +TASK [common : Configure timezone] changed +TASK [docker : Download Docker official GPG key] changed +TASK [docker : Add Docker APT repository] changed +TASK [docker : Update apt cache after Docker repo changes] changed +TASK [docker : Install Docker engine and CLI packages] changed +TASK [docker : Add target user to docker group] changed +RUNNING HANDLER [docker : restart docker] changed +... +PLAY RECAP +lab5-vm : ok=14 changed=9 unreachable=0 failed=0 skipped=0 +``` + +### Second run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] ok +TASK [common : Install common packages] ok +TASK [common : Configure timezone] ok +TASK [docker : Download Docker official GPG key] ok +TASK [docker : Add Docker APT repository] ok +TASK [docker : Install Docker engine and CLI packages] ok +TASK [docker : Add target user to docker group] ok +TASK [docker : Update apt cache after Docker repo changes] skipping +... +PLAY RECAP +lab5-vm : ok=12 changed=0 unreachable=0 failed=0 skipped=1 +``` + +### Analysis + +- On the first run, resources are created/configured to match desired state (packages, repo, Docker service, group membership). +- On the second run, Ansible modules compare desired and current state and skip unnecessary changes, proving idempotent behavior. +- Idempotency is achieved by stateful modules (`apt`, `service`, `user`, `docker_container`) instead of ad-hoc shell commands. + +## 4. Ansible Vault Usage + +- Credentials are stored in `group_vars/all.yml` encrypted via Ansible Vault. +- Vault password is entered interactively (`--ask-vault-pass`) or provided via local password file that is ignored by Git. +- Tasks containing credentials use `no_log: true` to prevent secret leakage in logs. + +### Encrypted file proof + +```text +$ANSIBLE_VAULT;1.1;AES256 +64383638346636396532383762376239633430663933613638326235653962353634323766343664 +3436646365333032316364663736356565616462353663310a303061333835663866303562323132 +65356163313437653263333138366561633533646662336634393333313737336439326132323666 +``` + +### Why Vault is important + +- Secrets can be committed safely only in encrypted form. +- Team members can share infrastructure code without exposing credentials. +- It reduces accidental secret leakage in repo history and CI logs. + +## 5. Deployment Verification + +### Deployment run (`playbooks/deploy.yml`) + +```text +PLAY [Deploy application] +... +TASK [app_deploy : Log in to Docker Hub] changed +TASK [app_deploy : Pull application image] changed +TASK [app_deploy : Run application container] changed +TASK [app_deploy : Wait for app port to be ready] ok +TASK [app_deploy : Verify health endpoint] ok +RUNNING HANDLER [app_deploy : restart app container] changed +... +PLAY RECAP +lab5-vm : ok=8 changed=4 unreachable=0 failed=0 skipped=2 +``` + +### Container status + +```text +lab5-vm | CHANGED | rc=0 >> +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +e45f2bb4472d tsixphoenix/devops-info-python:latest "python app.py" 58 seconds ago Up 49 seconds 0.0.0.0:5000->5000/tcp devops-info-python +``` + +### Health check + +```text +curl http://89.169.158.161:5000/health +{"status":"healthy","timestamp":"2026-02-24T11:09:07.680263Z","uptime_seconds":14} + +curl http://89.169.158.161:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"e45f2bb4472d","platform":"Linux","platform_version":"5.15.0-170-generic","architecture":"x86_64","cpu_count":2,"python_version":"3.13.12"},"runtime":{"uptime_seconds":16,"uptime_human":"0 hours, 0 minutes","current_time":"2026-02-24T11:09:09.533021Z","timezone":"UTC"},"request":{"client_ip":"188.130.155.186","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} +``` + +### Handler execution + +- Docker role handler: executed when Docker package changes require service restart. +- App deploy handler: executes only when container deployment task reports changes. + +## 6. Key Decisions + +- **Why use roles instead of plain playbooks?** + Roles separate concerns and keep top-level playbooks minimal. This reduces complexity and improves readability as automation grows. + +- **How do roles improve reusability?** + Roles encapsulate tasks + defaults + handlers. The same role can be reused across environments by changing only inventory and variables. + +- **What makes a task idempotent?** + Idempotent tasks declare target state (for example, `state: present`, `state: started`) and change only when current state differs. + +- **How do handlers improve efficiency?** + Handlers run only when notified by changed tasks, so expensive operations (like restarts) are not executed on every run. + +- **Why is Ansible Vault necessary?** + It allows secure storage of credentials in versioned infrastructure code without exposing plaintext secrets. + +## 7. Challenges + +- Initial control-node setup on Windows (Ansible-in-Docker + mounted SSH key permissions). +- Correctly configuring Docker repository and architecture mapping. +- Verifying no secret values appear in output logs. + +--- \ No newline at end of file diff --git a/lab5c/ansible/group_vars/all.yml b/lab5c/ansible/group_vars/all.yml new file mode 100644 index 0000000000..5f415c4704 --- /dev/null +++ b/lab5c/ansible/group_vars/all.yml @@ -0,0 +1,23 @@ +$ANSIBLE_VAULT;1.1;AES256 +64383638346636396532383762376239633430663933613638326235653962353634323766343664 +3436646365333032316364663736356565616462353663310a303061333835663866303562323132 +65356163313437653263333138366561633533646662336634393333313737336439326132323666 +3862636235616563310a373334663339636438663966653834356330663464633263613633326130 +34336536353233343036643965633262613162366332373436636537616131353730303334666438 +34326635656361623062326333666333393430316566383132656436643535623363346663333366 +39376364653165376138383561623036373133616130366661313764383837396432303631336565 +36636631383963623537333836303430313431373335653534333064393033373861636332316339 +36383730633662396633336664633138643935363637383934326331366366653139333462656161 +37646535653066616161663836336561396264326336313935643163323164346634316634363036 +64383130616332323630303561313566373461376531643732366334616562616431386364643561 +35383362633536326434376639363531346362336666393334636337316262303763326333343762 +30373635633762623431333335663232616335666332353665326263636362323934393135336435 +65323534333033616538373964386336663637633935366137356363383135336238393637336430 +61363661366261653634383934393430336361376166666261303935356337343234306330303462 +37326236393832376461653865356265393463326362333635653532633161326235336566316436 +34373436313533636333306437393966656536396435326666356536373763356132613263613038 +39353530393937363161656264663436313934373832623262633865363538313434303661633362 +36653233643231323066343639666630303632393333323966633437633762306535643436616131 +39383433393430303536343565303362616431666137613234663330336438323937356265666438 +38396130356666333032613834326637353230343235303031303363386137323736643466333963 +3065646533393438336638646163633461373432356339353831 diff --git a/lab5c/ansible/group_vars/all.yml.example b/lab5c/ansible/group_vars/all.yml.example new file mode 100644 index 0000000000..aa703de75b --- /dev/null +++ b/lab5c/ansible/group_vars/all.yml.example @@ -0,0 +1,19 @@ +--- +# Copy this file to group_vars/all.yml and encrypt it with: +# ansible-vault encrypt group_vars/all.yml + +# Docker Hub credentials +dockerhub_username: "DOCKERHUB_USERNAME" +dockerhub_password: "DOCKERHUB_ACCESS_TOKEN" + +# Application config +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 diff --git a/lab5c/ansible/inventory/hosts.ini b/lab5c/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..3bb6bcd055 --- /dev/null +++ b/lab5c/ansible/inventory/hosts.ini @@ -0,0 +1,5 @@ +[webservers] +lab5-vm ansible_host=89.169.158.161 ansible_user=ubuntu ansible_ssh_private_key_file=/ssh/id_ed25519 + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/lab5c/ansible/playbooks/deploy.yml b/lab5c/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..532fb1e207 --- /dev/null +++ b/lab5c/ansible/playbooks/deploy.yml @@ -0,0 +1,10 @@ +--- +- name: Deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - app_deploy diff --git a/lab5c/ansible/playbooks/provision.yml b/lab5c/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/lab5c/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/lab5c/ansible/playbooks/site.yml b/lab5c/ansible/playbooks/site.yml new file mode 100644 index 0000000000..5621849987 --- /dev/null +++ b/lab5c/ansible/playbooks/site.yml @@ -0,0 +1,12 @@ +--- +- name: Provision and deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - common + - docker + - app_deploy diff --git a/lab5c/ansible/requirements.yml b/lab5c/ansible/requirements.yml new file mode 100644 index 0000000000..b869f415df --- /dev/null +++ b/lab5c/ansible/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: community.docker + - name: community.general diff --git a/lab5c/ansible/roles/app_deploy/defaults/main.yml b/lab5c/ansible/roles/app_deploy/defaults/main.yml new file mode 100644 index 0000000000..7d7997a13a --- /dev/null +++ b/lab5c/ansible/roles/app_deploy/defaults/main.yml @@ -0,0 +1,11 @@ +--- +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 diff --git a/lab5c/ansible/roles/app_deploy/handlers/main.yml b/lab5c/ansible/roles/app_deploy/handlers/main.yml new file mode 100644 index 0000000000..1fc3fba48b --- /dev/null +++ b/lab5c/ansible/roles/app_deploy/handlers/main.yml @@ -0,0 +1,6 @@ +--- +- name: restart app container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: started + restart: true diff --git a/lab5c/ansible/roles/app_deploy/tasks/main.yml b/lab5c/ansible/roles/app_deploy/tasks/main.yml new file mode 100644 index 0000000000..9567528545 --- /dev/null +++ b/lab5c/ansible/roles/app_deploy/tasks/main.yml @@ -0,0 +1,62 @@ +--- +- name: Log in to Docker Hub + community.docker.docker_login: + registry_url: https://index.docker.io/v1/ + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + no_log: true + +- name: Pull application image + community.docker.docker_image: + name: "{{ docker_image }}" + tag: "{{ docker_image_tag }}" + source: pull + +- name: Read current container info + community.docker.docker_container_info: + name: "{{ app_container_name }}" + register: current_app_container + +- name: Stop existing app container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: stopped + when: + - current_app_container.exists | default(false) + - current_app_container.container.State.Status | default("") == "running" + +- name: Remove old app container + community.docker.docker_container: + name: "{{ app_container_name }}" + state: absent + when: current_app_container.exists | default(false) + +- name: Run application container + community.docker.docker_container: + name: "{{ app_container_name }}" + image: "{{ docker_image }}:{{ docker_image_tag }}" + state: started + restart_policy: "{{ app_restart_policy }}" + published_ports: + - "{{ app_port }}:5000" + env: "{{ app_env }}" + recreate: true + notify: restart app container + +- name: Wait for app port to be ready + ansible.builtin.wait_for: + host: "127.0.0.1" + port: "{{ app_port | int }}" + delay: 2 + timeout: "{{ app_wait_timeout }}" + +- name: Verify health endpoint + ansible.builtin.uri: + url: "http://127.0.0.1:{{ app_port }}{{ app_health_path }}" + method: GET + status_code: 200 + return_content: true + register: app_health_result + retries: 5 + delay: 3 + until: app_health_result.status == 200 diff --git a/lab5c/ansible/roles/common/defaults/main.yml b/lab5c/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..49e2e4526d --- /dev/null +++ b/lab5c/ansible/roles/common/defaults/main.yml @@ -0,0 +1,13 @@ +--- +common_packages: + - apt-transport-https + - ca-certificates + - curl + - git + - gnupg + - htop + - lsb-release + - python3-pip + - vim + +common_timezone: "UTC" diff --git a/lab5c/ansible/roles/common/tasks/main.yml b/lab5c/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..e9085097f4 --- /dev/null +++ b/lab5c/ansible/roles/common/tasks/main.yml @@ -0,0 +1,15 @@ +--- +- name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + +- name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + +- name: Configure timezone + community.general.timezone: + name: "{{ common_timezone }}" + when: common_timezone | default("") | length > 0 diff --git a/lab5c/ansible/roles/docker/defaults/main.yml b/lab5c/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..546f4a7af0 --- /dev/null +++ b/lab5c/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,16 @@ +--- +docker_arch_map: + x86_64: amd64 + aarch64: arm64 + +docker_arch: "{{ docker_arch_map.get(ansible_architecture, 'amd64') }}" + +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + +docker_python_package: python3-docker +docker_user: "{{ ansible_user | default('ubuntu') }}" diff --git a/lab5c/ansible/roles/docker/handlers/main.yml b/lab5c/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..1a5058da5e --- /dev/null +++ b/lab5c/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/lab5c/ansible/roles/docker/tasks/main.yml b/lab5c/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..bc99133596 --- /dev/null +++ b/lab5c/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,56 @@ +--- +- name: Install APT dependencies for Docker repository + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + +- name: Ensure Docker keyring directory exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + +- name: Download Docker official GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + +- name: Add Docker APT repository + ansible.builtin.apt_repository: + repo: "deb [arch={{ docker_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + filename: docker + state: present + register: docker_repo + +- name: Update apt cache after Docker repo changes + ansible.builtin.apt: + update_cache: true + when: docker_repo is changed + +- name: Install Docker engine and CLI packages + ansible.builtin.apt: + name: "{{ docker_packages + [docker_python_package] }}" + state: present + notify: restart docker + +- name: Ensure docker group exists + ansible.builtin.group: + name: docker + state: present + +- name: Add target user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + +- name: Ensure Docker service is enabled and running + ansible.builtin.service: + name: docker + state: started + enabled: true diff --git a/lab6c/ansible/.gitignore b/lab6c/ansible/.gitignore new file mode 100644 index 0000000000..b487bb7be0 --- /dev/null +++ b/lab6c/ansible/.gitignore @@ -0,0 +1,4 @@ +.vault_pass +*.retry +inventory/*.pyc +__pycache__/ diff --git a/lab6c/ansible/ansible.cfg b/lab6c/ansible/ansible.cfg new file mode 100644 index 0000000000..2fb9889628 --- /dev/null +++ b/lab6c/ansible/ansible.cfg @@ -0,0 +1,13 @@ +[defaults] +inventory = inventory/hosts.ini +roles_path = roles +host_key_checking = False +remote_user = ubuntu +retry_files_enabled = False +interpreter_python = auto_silent +timeout = 30 + +[privilege_escalation] +become = True +become_method = sudo +become_user = root diff --git a/lab6c/ansible/docs/LAB05.md b/lab6c/ansible/docs/LAB05.md new file mode 100644 index 0000000000..7db4ecb2c6 --- /dev/null +++ b/lab6c/ansible/docs/LAB05.md @@ -0,0 +1,180 @@ +# LAB05 — Ansible Fundamentals (Role-Based) + +## 1. Architecture Overview + +- **Ansible version used:** Ansible Core 2.17.8. +- **Control node:** Windows 10 + Docker Desktop (Ansible executed in container). +- **Target VM:** Ubuntu 22.04/24.04 VM from Lab 4 (cloud), connected via SSH. +- **Role structure:** Three roles are used: + - `common` - baseline OS preparation + - `docker` - Docker engine installation and service setup + - `app_deploy` - Dockerized app deployment and health verification +- **Why roles instead of one large playbook:** Roles isolate responsibilities, keep playbooks clean, and make each part reusable. + +## 2. Roles Documentation + +### Role: `common` + +- **Purpose:** Prepare the system with required base packages and timezone. +- **Variables (defaults):** + - `common_packages` - essential packages list (`curl`, `git`, `python3-pip`, etc.) + - `common_timezone` - default `UTC` +- **Handlers:** None. +- **Dependencies:** `community.general` collection (for timezone module). + +### Role: `docker` + +- **Purpose:** Install Docker from the official Docker APT repository and prepare runtime access. +- **Variables (defaults):** + - `docker_arch_map`, `docker_arch` + - `docker_packages` (`docker-ce`, `docker-ce-cli`, `containerd.io`, plugins) + - `docker_python_package` (`python3-docker`) + - `docker_user` (user added to `docker` group) +- **Handlers:** + - `restart docker` - restarts Docker service when package changes require it. +- **Dependencies:** Uses Ansible built-in modules; installs `python3-docker` on target host for Docker-related modules. + +### Role: `app_deploy` + +- **Purpose:** Authenticate to Docker Hub, pull image, replace container, and verify app health. +- **Variables (defaults):** + - `app_name`, `docker_image`, `docker_image_tag` + - `app_port`, `app_container_name` + - `app_restart_policy`, `app_env` + - `app_health_path`, `app_wait_timeout` + - Vaulted vars: `dockerhub_username`, `dockerhub_password` +- **Handlers:** + - `restart app container` - restarts running container when deployment task triggers notify. +- **Dependencies:** `community.docker` collection. + +## 3. Idempotency Demonstration + +### First run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] changed +TASK [common : Install common packages] changed +TASK [common : Configure timezone] changed +TASK [docker : Download Docker official GPG key] changed +TASK [docker : Add Docker APT repository] changed +TASK [docker : Update apt cache after Docker repo changes] changed +TASK [docker : Install Docker engine and CLI packages] changed +TASK [docker : Add target user to docker group] changed +RUNNING HANDLER [docker : restart docker] changed +... +PLAY RECAP +lab5-vm : ok=14 changed=9 unreachable=0 failed=0 skipped=0 +``` + +### Second run (`playbooks/provision.yml`) + +```text +PLAY [Provision web servers] +... +TASK [common : Update apt cache] ok +TASK [common : Install common packages] ok +TASK [common : Configure timezone] ok +TASK [docker : Download Docker official GPG key] ok +TASK [docker : Add Docker APT repository] ok +TASK [docker : Install Docker engine and CLI packages] ok +TASK [docker : Add target user to docker group] ok +TASK [docker : Update apt cache after Docker repo changes] skipping +... +PLAY RECAP +lab5-vm : ok=12 changed=0 unreachable=0 failed=0 skipped=1 +``` + +### Analysis + +- On the first run, resources are created/configured to match desired state (packages, repo, Docker service, group membership). +- On the second run, Ansible modules compare desired and current state and skip unnecessary changes, proving idempotent behavior. +- Idempotency is achieved by stateful modules (`apt`, `service`, `user`, `docker_container`) instead of ad-hoc shell commands. + +## 4. Ansible Vault Usage + +- Credentials are stored in `group_vars/all.yml` encrypted via Ansible Vault. +- Vault password is entered interactively (`--ask-vault-pass`) or provided via local password file that is ignored by Git. +- Tasks containing credentials use `no_log: true` to prevent secret leakage in logs. + +### Encrypted file proof + +```text +$ANSIBLE_VAULT;1.1;AES256 +64383638346636396532383762376239633430663933613638326235653962353634323766343664 +3436646365333032316364663736356565616462353663310a303061333835663866303562323132 +65356163313437653263333138366561633533646662336634393333313737336439326132323666 +``` + +### Why Vault is important + +- Secrets can be committed safely only in encrypted form. +- Team members can share infrastructure code without exposing credentials. +- It reduces accidental secret leakage in repo history and CI logs. + +## 5. Deployment Verification + +### Deployment run (`playbooks/deploy.yml`) + +```text +PLAY [Deploy application] +... +TASK [app_deploy : Log in to Docker Hub] changed +TASK [app_deploy : Pull application image] changed +TASK [app_deploy : Run application container] changed +TASK [app_deploy : Wait for app port to be ready] ok +TASK [app_deploy : Verify health endpoint] ok +RUNNING HANDLER [app_deploy : restart app container] changed +... +PLAY RECAP +lab5-vm : ok=8 changed=4 unreachable=0 failed=0 skipped=2 +``` + +### Container status + +```text +lab5-vm | CHANGED | rc=0 >> +CONTAINER ID IMAGE COMMAND CREATED STATUS PORTS NAMES +e45f2bb4472d tsixphoenix/devops-info-python:latest "python app.py" 58 seconds ago Up 49 seconds 0.0.0.0:5000->5000/tcp devops-info-python +``` + +### Health check + +```text +curl http://89.169.158.161:5000/health +{"status":"healthy","timestamp":"2026-02-24T11:09:07.680263Z","uptime_seconds":14} + +curl http://89.169.158.161:5000/ +{"service":{"name":"devops-info-service","version":"1.0.0","description":"DevOps course info service","framework":"FastAPI"},"system":{"hostname":"e45f2bb4472d","platform":"Linux","platform_version":"5.15.0-170-generic","architecture":"x86_64","cpu_count":2,"python_version":"3.13.12"},"runtime":{"uptime_seconds":16,"uptime_human":"0 hours, 0 minutes","current_time":"2026-02-24T11:09:09.533021Z","timezone":"UTC"},"request":{"client_ip":"188.130.155.186","user_agent":"curl/8.16.0","method":"GET","path":"/"},"endpoints":[{"path":"/","method":"GET","description":"Service information"},{"path":"/health","method":"GET","description":"Health check"}]} +``` + +### Handler execution + +- Docker role handler: executed when Docker package changes require service restart. +- App deploy handler: executes only when container deployment task reports changes. + +## 6. Key Decisions + +- **Why use roles instead of plain playbooks?** + Roles separate concerns and keep top-level playbooks minimal. This reduces complexity and improves readability as automation grows. + +- **How do roles improve reusability?** + Roles encapsulate tasks + defaults + handlers. The same role can be reused across environments by changing only inventory and variables. + +- **What makes a task idempotent?** + Idempotent tasks declare target state (for example, `state: present`, `state: started`) and change only when current state differs. + +- **How do handlers improve efficiency?** + Handlers run only when notified by changed tasks, so expensive operations (like restarts) are not executed on every run. + +- **Why is Ansible Vault necessary?** + It allows secure storage of credentials in versioned infrastructure code without exposing plaintext secrets. + +## 7. Challenges + +- Initial control-node setup on Windows (Ansible-in-Docker + mounted SSH key permissions). +- Correctly configuring Docker repository and architecture mapping. +- Verifying no secret values appear in output logs. + +--- \ No newline at end of file diff --git a/lab6c/ansible/docs/LAB06.md b/lab6c/ansible/docs/LAB06.md new file mode 100644 index 0000000000..1f7ff3799f --- /dev/null +++ b/lab6c/ansible/docs/LAB06.md @@ -0,0 +1,164 @@ +# Lab 6 — Advanced Ansible & CI/CD + +## 1. Overview + +This lab extends Lab 5 with: +- **Blocks and tags** in common and docker roles +- **Docker Compose** for app deployment (replacing docker run) +- **Wipe logic** (variable + tag) for clean removal +- **GitHub Actions** workflow for automated deployment + +## 2. Blocks & Tags + +### Common Role +- **packages** block: apt update + install, with rescue (retry apt on failure), always (log completion) +- **users** block: ensure sudo group +- **common** tag: entire role + +### Docker Role +- **docker_install** block: repo setup, package install; rescue (wait 10s, retry); always (ensure service enabled) +- **docker_config** block: docker group, add user +- **docker** tag: entire role + +### Web App Role +- **app_deploy**, **compose** tags: deployment tasks +- **web_app_wipe** tag: wipe tasks only + +### Execution Examples +```bash +ansible-playbook playbooks/provision.yml --tags "docker" +ansible-playbook playbooks/provision.yml --skip-tags "common" +ansible-playbook playbooks/provision.yml --tags "packages" +ansible-playbook playbooks/provision.yml --list-tags +``` + +## 3. Docker Compose Migration + +- **Template:** `roles/web_app/templates/docker-compose.yml.j2` +- **Project dir:** `/opt/{{ app_name }}` +- **Role dependency:** `web_app` depends on `docker` (meta/main.yml) +- **Module:** `community.docker.docker_compose_v2` with `state: present`, `pull: always` + +## 4. Wipe Logic + +- **Variable:** `web_app_wipe: false` (default) +- **Tag:** `web_app_wipe` +- **Tasks:** `roles/web_app/tasks/wipe.yml` — compose down, remove file, remove dir + +### Scenarios +1. **Normal deploy:** `ansible-playbook playbooks/deploy.yml` — wipe skipped +2. **Wipe only:** `ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe` +3. **Clean reinstall:** `ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true"` +4. **Safety:** `--tags web_app_wipe` without variable — wipe skipped (when blocks it) + +## 5. CI/CD Integration + +- **Workflow:** `.github/workflows/ansible-deploy.yml` +- **Triggers:** push to `lab6c/ansible/**` +- **Jobs:** lint (ansible-lint), deploy (playbook + verify) +- **Secrets required:** `ANSIBLE_VAULT_PASSWORD`, `SSH_PRIVATE_KEY`, `VM_HOST`, `VM_USER` + +## 6. Testing Results + +### 6.1 Provision with tags +```bash +ansible-playbook playbooks/provision.yml --tags "docker" +``` +``` +PLAY RECAP ********************************************************************* +lab5-vm : ok=9 changed=0 unreachable=0 failed=0 skipped=1 rescued=0 ignored=0 +``` + +### 6.2 List of tags +```bash +ansible-playbook playbooks/provision.yml --list-tags +``` +``` +playbook: playbooks/provision.yml + play #1 (webservers): Provision web servers TAGS: [] + TASK TAGS: [common, docker, docker_config, docker_install, packages, users] +``` + +### 6.3 Deploy +```bash +ansible-playbook playbooks/deploy.yml +``` +``` +PLAY RECAP ********************************************************************* +lab5-vm : ok=16 changed=2 unreachable=0 failed=0 skipped=5 rescued=0 ignored=0 +``` + +### 6.4 Wipe-only +```bash +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" --tags web_app_wipe +``` +``` +TASK [web_app : Log wipe completion] ******************************************* +ok: [lab5-vm] => {"msg": "Application devops-info-python wiped successfully"} +PLAY RECAP ********************************************************************* +lab5-vm : ok=6 changed=3 unreachable=0 failed=0 skipped=0 +``` + +### 6.5 Clean reinstall +```bash +ansible-playbook playbooks/deploy.yml -e "web_app_wipe=true" +``` +``` +PLAY RECAP ********************************************************************* +lab5-vm : ok=20 changed=3 unreachable=0 failed=0 skipped=1 rescued=0 ignored=1 +``` + +### 6.6 Health check +```bash +curl http://62.84.127.190:5000/health +``` +```json +{"status":"healthy","timestamp":"2026-03-05T12:17:53.667273Z","uptime_seconds":60} +``` + +### 6.7 Idempotency (2nd deploy run) +```bash +ansible-playbook playbooks/deploy.yml +``` +Second run: `changed=0` (all `ok`, no changes). + +### 6.8 Scenario 4a — safety (--tags web_app_wipe without variable) +```bash +ansible-playbook playbooks/deploy.yml --tags web_app_wipe +``` +Wipe tasks skipped (when blocks: `web_app_wipe` false by default). Deploy runs normally. + +### 6.9 GitHub Actions +Add 4 secrets and push. Include screenshot of successful workflow in report. + + +## 7. Challenges & Solutions + +- **Template `to_native` filter:** Ansible 2.16+ does not provide `to_native` in Jinja2 — replaced with `to_json`. +- **dpkg lock:** On a new VM, `unattended-upgrades` blocks apt; retry `provision` after updates complete succeeds. +- **Wipe on empty directory:** `docker_compose_v2 state: absent` fails if directory was already removed. Added `compose_dir_stat` check before `compose down`. + +## 8. Research Answers + +### Task 1 — Blocks & Tags +- **If rescue also fails?** Play will fail with error; can add `ignore_errors` or nested rescue. +- **Nested blocks?** Yes, a block can contain another block. +- **Tag inheritance?** Tags on block apply to all tasks inside. + +### Task 2 — Docker Compose +- **restart: always vs unless-stopped?** `unless-stopped` does not restart container after manual stop. +- **Compose networks vs bridge?** Compose creates named networks; bridge is the default network. +- **Vault in template?** Yes, Vault variables are available when templating. + +### Task 3 — Wipe Logic +- **Variable + tag?** Double safety: variable prevents accidental wipe; tag enables selective execution. +- **never tag vs our approach?** `never` disables task by tag; our approach requires both tag and variable. +- **Wipe before deploy?** Enables clean reinstall: wipe → deploy in one run. +- **Clean reinstall vs rolling update?** Reinstall = full replacement; rolling = phased update without downtime. +- **Extending wipe?** Can add `docker image prune` and `docker volume rm` to wipe.yml. + +### Task 4 — CI/CD +- **SSH keys in Secrets?** Use short-lived keys; regular rotation; restrict scope. +- **Staging → production?** Separate inventory/playbooks, approval before prod, or environment protection. +- **Rollbacks?** Add tag/version to image, keep previous config, workflow for rollback. +- **Self-hosted vs GitHub-hosted?** Self-hosted gives direct network/VMs access; fewer SSH key exposure risks. diff --git a/lab6c/ansible/docs/workflow.jpg b/lab6c/ansible/docs/workflow.jpg new file mode 100644 index 0000000000..bd8ac1af34 Binary files /dev/null and b/lab6c/ansible/docs/workflow.jpg differ diff --git a/lab6c/ansible/group_vars/all.yml b/lab6c/ansible/group_vars/all.yml new file mode 100644 index 0000000000..e12b23ba74 --- /dev/null +++ b/lab6c/ansible/group_vars/all.yml @@ -0,0 +1,33 @@ +$ANSIBLE_VAULT;1.1;AES256 +61633462363861316436623662633839646361313433333434316236643637393039656137333630 +3638333365396330616461636436633866333531633930630a383464376530653839613930383661 +31313332303966653138366435366666353838316232633836393534646261333437396530386164 +6138376139346638330a613032643265376438343935356238613235343434356638653963316365 +65626665313563383639323930346438363239386565626434656337383430343732333962396335 +61613535636538626165313333613464633935343361353163353366333966653665383832373838 +61356436323939613636616635333836346330323531623037353736366462393336356633616132 +39343936353837316132373338616633353834333065303536326439366530666539336561303036 +64383632313331613932363934313264646464336635343535363631333031653664326530653038 +39356366366237396366386637313939306637373438366162303962386536633936626130633433 +63393363613863333965316537383439623732303862386238306637343136396634626639666335 +31363333356239303631306432656265306135643238656366346635663637666465303761653162 +66326363643065343062646634366636636166336136363862616630363030636533623861626132 +63633232373638623331323231323366326236316331663961656236666237393361653533343435 +38323333363938363237613432353362396330333961326332383634396333663336613665396637 +37383366643764363438616536323463303634396139363837343335366662653835373630303131 +66393063373339323630393238306638663335623232303239613831353932376465663834663736 +37396661323332303061633538393035356531623065396634633433623862666538356538343761 +61653630346237656663356462343366653163326261663138303132373932323863663566393932 +34653132623136633734663664356631663839363533353631373162306339653938636161633939 +62306639396634646366316662306533663337376262666333653431383562656138643264376437 +66376630326437353766613733333434333861613830303366663133363733656561393666613364 +65656636316663666438323635343062626138393963303837393536353466626161353531313733 +37373866363435303436346339393566656431326233333336343636303638313632363466653963 +32353963356431383433396461653034353963346462653066356662396462383432363231633739 +64646563306534323565396263623236356130643234313239393232366633333034383466653438 +37313138363764306561343364393838353963373464633864356666376536383131626638333332 +35333538306161633465663966663464643032343665393438366538623666346263333839393532 +61393132313662346266346234393766616532356638663432626236363238303063666135626663 +35346434346632653164646530323833656433386465313037653231336365363739336661636163 +33346463303439383837376363343430333161396431653538313466323563343964363238333132 +35303738346436393766 diff --git a/lab6c/ansible/group_vars/all.yml.example b/lab6c/ansible/group_vars/all.yml.example new file mode 100644 index 0000000000..335be5e489 --- /dev/null +++ b/lab6c/ansible/group_vars/all.yml.example @@ -0,0 +1,22 @@ +--- +# Copy to group_vars/all.yml and encrypt: ansible-vault encrypt group_vars/all.yml + +# Docker Hub (required for deploy) +dockerhub_username: "YOUR_DOCKERHUB_USERNAME" +dockerhub_password: "YOUR_DOCKERHUB_ACCESS_TOKEN" + +# Application config (Lab 6 Docker Compose) +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_internal_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 + +# Docker Compose project directory on target VM +compose_project_dir: "/opt/{{ app_name }}" diff --git a/lab6c/ansible/inventory/hosts.ini b/lab6c/ansible/inventory/hosts.ini new file mode 100644 index 0000000000..b0c44a8fd4 --- /dev/null +++ b/lab6c/ansible/inventory/hosts.ini @@ -0,0 +1,5 @@ +[webservers] +lab5-vm ansible_host=62.84.127.190 ansible_user=ubuntu ansible_ssh_private_key_file=/ssh/id_ed25519 + +[webservers:vars] +ansible_python_interpreter=/usr/bin/python3 diff --git a/lab6c/ansible/playbooks/deploy.yml b/lab6c/ansible/playbooks/deploy.yml new file mode 100644 index 0000000000..f3923b77bb --- /dev/null +++ b/lab6c/ansible/playbooks/deploy.yml @@ -0,0 +1,10 @@ +--- +- name: Deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - web_app diff --git a/lab6c/ansible/playbooks/provision.yml b/lab6c/ansible/playbooks/provision.yml new file mode 100644 index 0000000000..7cc2e6678d --- /dev/null +++ b/lab6c/ansible/playbooks/provision.yml @@ -0,0 +1,8 @@ +--- +- name: Provision web servers + hosts: webservers + become: true + + roles: + - common + - docker diff --git a/lab6c/ansible/playbooks/site.yml b/lab6c/ansible/playbooks/site.yml new file mode 100644 index 0000000000..f90334eff7 --- /dev/null +++ b/lab6c/ansible/playbooks/site.yml @@ -0,0 +1,12 @@ +--- +- name: Provision and deploy application + hosts: webservers + become: true + + vars_files: + - ../group_vars/all.yml + + roles: + - common + - docker + - web_app diff --git a/lab6c/ansible/requirements.yml b/lab6c/ansible/requirements.yml new file mode 100644 index 0000000000..b869f415df --- /dev/null +++ b/lab6c/ansible/requirements.yml @@ -0,0 +1,4 @@ +--- +collections: + - name: community.docker + - name: community.general diff --git a/lab6c/ansible/roles/common/defaults/main.yml b/lab6c/ansible/roles/common/defaults/main.yml new file mode 100644 index 0000000000..49e2e4526d --- /dev/null +++ b/lab6c/ansible/roles/common/defaults/main.yml @@ -0,0 +1,13 @@ +--- +common_packages: + - apt-transport-https + - ca-certificates + - curl + - git + - gnupg + - htop + - lsb-release + - python3-pip + - vim + +common_timezone: "UTC" diff --git a/lab6c/ansible/roles/common/tasks/main.yml b/lab6c/ansible/roles/common/tasks/main.yml new file mode 100644 index 0000000000..bb93353eee --- /dev/null +++ b/lab6c/ansible/roles/common/tasks/main.yml @@ -0,0 +1,58 @@ +--- +# Common role: baseline system setup +# Tags: packages, users, common + +- name: Install packages + block: + - name: Update apt cache + ansible.builtin.apt: + update_cache: true + cache_valid_time: 3600 + tags: + - packages + - common + + - name: Install common packages + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + tags: + - packages + - common + + rescue: + - name: Retry apt update on failure + ansible.builtin.apt: + update_cache: true + ignore_errors: true + + - name: Re-run package install after cache fix + ansible.builtin.apt: + name: "{{ common_packages }}" + state: present + + always: + - name: Log common role completion + ansible.builtin.copy: + content: "common role completed at {{ ansible_date_time.iso8601 }}\n" + dest: /tmp/ansible_common_completed + mode: "0644" + tags: + - common + +- name: User and group setup + block: + - name: Ensure sudo group exists + ansible.builtin.group: + name: sudo + state: present + tags: + - users + - common + +- name: Configure timezone + community.general.timezone: + name: "{{ common_timezone }}" + when: common_timezone | default("") | length > 0 + tags: + - common diff --git a/lab6c/ansible/roles/docker/defaults/main.yml b/lab6c/ansible/roles/docker/defaults/main.yml new file mode 100644 index 0000000000..546f4a7af0 --- /dev/null +++ b/lab6c/ansible/roles/docker/defaults/main.yml @@ -0,0 +1,16 @@ +--- +docker_arch_map: + x86_64: amd64 + aarch64: arm64 + +docker_arch: "{{ docker_arch_map.get(ansible_architecture, 'amd64') }}" + +docker_packages: + - docker-ce + - docker-ce-cli + - containerd.io + - docker-buildx-plugin + - docker-compose-plugin + +docker_python_package: python3-docker +docker_user: "{{ ansible_user | default('ubuntu') }}" diff --git a/lab6c/ansible/roles/docker/handlers/main.yml b/lab6c/ansible/roles/docker/handlers/main.yml new file mode 100644 index 0000000000..1a5058da5e --- /dev/null +++ b/lab6c/ansible/roles/docker/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart docker + ansible.builtin.service: + name: docker + state: restarted diff --git a/lab6c/ansible/roles/docker/tasks/main.yml b/lab6c/ansible/roles/docker/tasks/main.yml new file mode 100644 index 0000000000..990a1dc49c --- /dev/null +++ b/lab6c/ansible/roles/docker/tasks/main.yml @@ -0,0 +1,105 @@ +--- +# Docker role: install Docker engine from official repo +# Tags: docker_install, docker_config, docker + +- name: Docker installation + block: + - name: Install APT dependencies for Docker repository + ansible.builtin.apt: + name: + - ca-certificates + - curl + - gnupg + state: present + update_cache: true + tags: + - docker_install + - docker + + - name: Ensure Docker keyring directory exists + ansible.builtin.file: + path: /etc/apt/keyrings + state: directory + mode: "0755" + tags: + - docker_install + - docker + + - name: Download Docker official GPG key + ansible.builtin.get_url: + url: https://download.docker.com/linux/ubuntu/gpg + dest: /etc/apt/keyrings/docker.asc + mode: "0644" + tags: + - docker_install + - docker + + - name: Add Docker APT repository + ansible.builtin.apt_repository: + repo: "deb [arch={{ docker_arch }} signed-by=/etc/apt/keyrings/docker.asc] https://download.docker.com/linux/ubuntu {{ ansible_distribution_release }} stable" + filename: docker + state: present + register: docker_repo + tags: + - docker_install + - docker + + - name: Update apt cache after Docker repo changes + ansible.builtin.apt: + update_cache: true + when: docker_repo is changed + tags: + - docker_install + - docker + + - name: Install Docker engine and CLI packages + ansible.builtin.apt: + name: "{{ docker_packages + [docker_python_package] }}" + state: present + notify: restart docker + tags: + - docker_install + - docker + + rescue: + - name: Wait before retry after GPG/repo failure + ansible.builtin.pause: + seconds: 10 + prompt: "Retrying Docker repo setup..." + + - name: Retry apt update + ansible.builtin.apt: + update_cache: true + + always: + - name: Ensure Docker service is enabled and running + ansible.builtin.service: + name: docker + state: started + enabled: true + tags: + - docker + - docker_config + +- name: Docker configuration + block: + - name: Ensure docker group exists + ansible.builtin.group: + name: docker + state: present + tags: + - docker_config + - docker + + - name: Add target user to docker group + ansible.builtin.user: + name: "{{ docker_user }}" + groups: docker + append: true + tags: + - docker_config + - docker + + tags: + - docker_config + - docker diff --git a/lab6c/ansible/roles/web_app/defaults/main.yml b/lab6c/ansible/roles/web_app/defaults/main.yml new file mode 100644 index 0000000000..0e150a57fe --- /dev/null +++ b/lab6c/ansible/roles/web_app/defaults/main.yml @@ -0,0 +1,19 @@ +--- +# Application config +app_name: "devops-info-python" +docker_image: "{{ dockerhub_username }}/{{ app_name }}" +docker_image_tag: "latest" + +app_port: 5000 +app_internal_port: 5000 +app_container_name: "{{ app_name }}" +app_restart_policy: "unless-stopped" +app_env: {} +app_health_path: "/health" +app_wait_timeout: 120 + +# Docker Compose +compose_project_dir: "/opt/{{ app_name }}" + +# Wipe logic: set to true to remove app completely. Also requires --tags web_app_wipe for wipe-only. +web_app_wipe: false diff --git a/lab6c/ansible/roles/web_app/handlers/main.yml b/lab6c/ansible/roles/web_app/handlers/main.yml new file mode 100644 index 0000000000..f8bfe8ed4f --- /dev/null +++ b/lab6c/ansible/roles/web_app/handlers/main.yml @@ -0,0 +1,5 @@ +--- +- name: restart app container + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: present diff --git a/lab6c/ansible/roles/web_app/meta/main.yml b/lab6c/ansible/roles/web_app/meta/main.yml new file mode 100644 index 0000000000..a5177c0f80 --- /dev/null +++ b/lab6c/ansible/roles/web_app/meta/main.yml @@ -0,0 +1,4 @@ +--- +# web_app depends on docker - Docker must be installed before deploying containers +dependencies: + - role: docker diff --git a/lab6c/ansible/roles/web_app/tasks/main.yml b/lab6c/ansible/roles/web_app/tasks/main.yml new file mode 100644 index 0000000000..063676e3cf --- /dev/null +++ b/lab6c/ansible/roles/web_app/tasks/main.yml @@ -0,0 +1,84 @@ +--- +# web_app role: deploy containerized app with Docker Compose +# Tags: app_deploy, compose, web_app +# Dependency: docker role (installed automatically via meta/main.yml) + +# Wipe logic runs first when explicitly requested +- name: Include wipe tasks + include_tasks: wipe.yml + tags: + - web_app_wipe + +# Deployment block +- name: Deploy application with Docker Compose + block: + - name: Log in to Docker Hub + community.docker.docker_login: + registry_url: https://index.docker.io/v1/ + username: "{{ dockerhub_username }}" + password: "{{ dockerhub_password }}" + no_log: true + tags: + - app_deploy + - compose + + - name: Create application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: directory + mode: "0755" + tags: + - app_deploy + - compose + + - name: Template docker-compose.yml + ansible.builtin.template: + src: docker-compose.yml.j2 + dest: "{{ compose_project_dir }}/docker-compose.yml" + mode: "0644" + tags: + - app_deploy + - compose + + - name: Deploy with Docker Compose (up) + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: present + pull: always + tags: + - app_deploy + - compose + + - name: Wait for app port to be ready + ansible.builtin.wait_for: + host: "127.0.0.1" + port: "{{ app_port | int }}" + delay: 2 + timeout: "{{ app_wait_timeout }}" + tags: + - app_deploy + - compose + + - name: Verify health endpoint + ansible.builtin.uri: + url: "http://127.0.0.1:{{ app_port }}{{ app_health_path }}" + method: GET + status_code: 200 + return_content: true + register: app_health_result + retries: 5 + delay: 3 + until: app_health_result.status == 200 + tags: + - app_deploy + - compose + + rescue: + - name: Log deployment failure + ansible.builtin.debug: + msg: "Deployment failed, check logs above" + + tags: + - app_deploy + - compose + - web_app diff --git a/lab6c/ansible/roles/web_app/tasks/wipe.yml b/lab6c/ansible/roles/web_app/tasks/wipe.yml new file mode 100644 index 0000000000..fbeb835e4f --- /dev/null +++ b/lab6c/ansible/roles/web_app/tasks/wipe.yml @@ -0,0 +1,37 @@ +--- +# Wipe web application: stops containers, removes compose file and app directory +# Runs only when web_app_wipe=true AND tag web_app_wipe specified +# Usage: ansible-playbook deploy.yml -e "web_app_wipe=true" --tags web_app_wipe + +- name: Check if app directory exists + ansible.builtin.stat: + path: "{{ compose_project_dir }}" + register: compose_dir_stat + +- name: Wipe web application + block: + - name: Stop and remove containers with Docker Compose + community.docker.docker_compose_v2: + project_src: "{{ compose_project_dir }}" + state: absent + when: compose_dir_stat.stat.isdir | default(false) + + - name: Remove docker-compose file + ansible.builtin.file: + path: "{{ compose_project_dir }}/docker-compose.yml" + state: absent + ignore_errors: true + + - name: Remove application directory + ansible.builtin.file: + path: "{{ compose_project_dir }}" + state: absent + ignore_errors: true + + - name: Log wipe completion + ansible.builtin.debug: + msg: "Application {{ app_name }} wiped successfully" + + when: web_app_wipe | default(false) | bool + tags: + - web_app_wipe diff --git a/lab6c/ansible/roles/web_app/templates/docker-compose.yml.j2 b/lab6c/ansible/roles/web_app/templates/docker-compose.yml.j2 new file mode 100644 index 0000000000..7285e18cf1 --- /dev/null +++ b/lab6c/ansible/roles/web_app/templates/docker-compose.yml.j2 @@ -0,0 +1,15 @@ +--- +# Generated by Ansible - do not edit manually +# Template: roles/web_app/templates/docker-compose.yml.j2 + +services: + {{ app_name }}: + image: {{ docker_image }}:{{ docker_image_tag }} + container_name: {{ app_container_name }} + ports: + - "{{ app_port }}:{{ app_internal_port }}" + environment: {{ app_env | default({}) | to_json }} + restart: {{ app_restart_policy }} + {% if app_extra_hosts is defined and app_extra_hosts %} + extra_hosts: {{ app_extra_hosts | to_json }} + {% endif %} diff --git a/lab7c/docker-compose.yml b/lab7c/docker-compose.yml new file mode 100644 index 0000000000..4ad8163ebd --- /dev/null +++ b/lab7c/docker-compose.yml @@ -0,0 +1,109 @@ +version: "3.8" + +networks: + logging: + name: logging + +volumes: + loki-data: + grafana-data: + +services: + loki: + image: grafana/loki:3.0.0 + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + promtail: + image: grafana/promtail:3.0.0 + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/log:/var/log:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + depends_on: + - loki + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - "3000:3000" + environment: + GF_AUTH_ANONYMOUS_ENABLED: "false" + GF_SECURITY_ADMIN_USER: "admin" + GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-admin}" + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + depends_on: + - loki + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + app-python: + image: tsixphoenix/devops-info-python:latest + ports: + - "8000:5000" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + environment: + HOST: "0.0.0.0" + PORT: "5000" + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + diff --git a/lab7c/docs/LAB07.md b/lab7c/docs/LAB07.md new file mode 100644 index 0000000000..f898ad53f1 --- /dev/null +++ b/lab7c/docs/LAB07.md @@ -0,0 +1,197 @@ +# Lab 7 — Observability & Logging with Loki Stack + +## 1. Architecture + +- **Loki**: log storage and query engine (TSDB on filesystem, 7‑day retention). +- **Promtail**: collects container logs from Docker and ships them to Loki. +- **Grafana**: visualizes logs and dashboards using LogQL. +- **App (FastAPI)**: `devops-info-service` container, logging JSON to stdout. +- All services run in `lab7c/docker-compose.yml` on a shared `logging` network. + +## 2. Setup Guide + +### 2.1 Stack deployment + +```bash +cd monitoring +docker compose up -d +docker compose ps +``` + +Services: +- `loki` on `3100` +- `promtail` on `9080` +- `grafana` on `3000` +- `app-python` on `8000` (mapped to container 5000) + +### 2.2 Verification + +```bash +# Loki readiness +curl http://localhost:3100/ready + +# Promtail targets +curl http://localhost:9080/targets + +# Open Grafana (local) +http://localhost:3000 +``` + +In Grafana: +1. **Connections → Data sources → Add data source → Loki** +2. URL: `http://loki:3100` +3. **Save & Test** → “Data source connected” +4. Go to **Explore**, choose **Loki**, run `{job="docker"}`. + +## 3. Configuration + +### 3.1 Docker Compose (`lab7c/docker-compose.yml`) + +- Defines network `logging` and volumes `loki-data`, `grafana-data`. +- **Loki**: + - Image `grafana/loki:3.0.0` + - Mounts `./loki/config.yml` to `/etc/loki/config.yml` + - Persists data in `loki-data:/loki` + - Health check on `/ready` + - Resource limits and reservations set. +- **Promtail**: + - Image `grafana/promtail:3.0.0` + - Mounts `./promtail/config.yml` + - Mounts `/var/lib/docker/containers` and `/var/run/docker.sock` read‑only. +- **Grafana**: + - Image `grafana/grafana:12.3.1` + - Port `3000:3000` + - Admin user/password via env (for dev: `admin` / `${GRAFANA_ADMIN_PASSWORD:-admin}`). + - Health check on `/api/health`, resource limits. +- **app-python**: + - Image `tsixphoenix/devops-info-python:latest` + - Port `8000:5000` + - Labels `logging="promtail"`, `app="devops-python"` for Promtail/Loki labels. + +### 3.2 Loki (`lab7c/loki/config.yml`) + +- `auth_enabled: false` for local testing. +- `server.http_listen_port: 3100`. +- `common`: + - `path_prefix: /loki` + - filesystem storage for chunks and rules. + - in‑memory ring for a single instance. +- `schema_config`: + - `store: tsdb`, `object_store: filesystem`, `schema: v13`, daily index. +- `storage_config`: + - `tsdb_shipper` index in `/loki/index` with cache. + - filesystem chunks in `/loki/chunks`. +- `limits_config.retention_period: 168h` (7 days). +- `compactor`: + - cleans up old logs with `retention_enabled: true`. + +### 3.3 Promtail (`lab7c/promtail/config.yml`) + +- `server.http_listen_port: 9080`. +- `positions` stored in `/tmp/positions.yaml`. +- `clients` send to `http://loki:3100/loki/api/v1/push`. +- `scrape_configs` for **Docker**: + - `docker_sd_configs` on `unix:///var/run/docker.sock`. + - `relabel_configs`: + - `container` label from `__meta_docker_container_name`. + - `app` label from container label `app`. + - `logging` label from container label `logging`. + +## 4. Application Logging (JSON) + +In `lab3c/app_python/app.py`: +- Switched to **JSON log lines** using the standard `logging` module. +- HTTP middleware logs: + - `timestamp`, `level`, `service`, `method`, `path`, `status`, `client_ip`, `user_agent`. +- Logs are written to stdout and collected by Docker, then by Promtail. + +Example JSON log line: +```json +{ + "timestamp": "2026-03-05T12:20:00Z", + "level": "INFO", + "service": "devops-info-service", + "method": "GET", + "path": "/health", + "status": 200, + "client_ip": "127.0.0.1", + "user_agent": "curl/8.6.0", + "message": "request" +} +``` + +Screenshots are stored in `lab7c/docs/`. +## 5. Dashboard & LogQL + +### 5.1 Explore queries + +In Grafana Explore (Loki data source): + +- All logs for Python app: +```logql +{app="devops-python"} +``` + +- Only error logs: +```logql +{app="devops-python"} |= "ERROR" +``` + +- Parse JSON and filter by method: +```logql +{app="devops-python"} | json | method="GET" +``` + +### 5.2 Dashboard panels + +Dashboard panels created (LogQL examples): + +1. **Logs Table** (all apps): + ```logql + {app=~"devops-.*"} + ``` +2. **Request Rate** (time series): + ```logql + sum by (app) (rate({app=~"devops-.*"}[1m])) + ``` +3. **Error Logs**: + ```logql + {app=~"devops-.*"} | json | level="ERROR" + ``` +4. **Log Level Distribution**: + ```logql + sum by (level) (count_over_time({app=~"devops-.*"} | json [5m])) + ``` + +## 6. Production Configuration + +- **Resource limits**: all services have `deploy.resources` limits and reservations. +- **Grafana security**: + - Anonymous access disabled (`GF_AUTH_ANONYMOUS_ENABLED=false`). + - Admin credentials configured via environment variables / `.env`. +- **Health checks**: + - Loki: `/ready` endpoint. + - Grafana: `/api/health` endpoint. +- **Retention**: + - Loki configured for 7 days (`retention_period: 168h`) with compactor cleanup. + +## 7. Testing + +1. Start stack: `docker compose up -d`. +2. Generate logs: + ```bash + for i in {1..20}; do curl http://localhost:8000/; done + for i in {1..20}; do curl http://localhost:8000/health; done + ``` +3. In Grafana Explore, run: + - `{app="devops-python"}` + - `{app="devops-python"} | json | method="GET"` + - `{app="devops-python"} | json | level="ERROR"` +4. Check dashboard panels render data. + +## 8. Challenges + +- **Docker TSDB configuration**: required reading Loki 3.0 docs to use `tsdb` with filesystem correctly. +- **Docker discovery**: Promtail needed correct Docker SD and relabeling to get `app` and `container` labels. +- **JSON logging**: changing logging format without breaking existing behavior and keeping logs parseable in Loki. + diff --git a/lab7c/docs/dashboard.jpg b/lab7c/docs/dashboard.jpg new file mode 100644 index 0000000000..05843cfab4 Binary files /dev/null and b/lab7c/docs/dashboard.jpg differ diff --git a/lab7c/docs/query.jpg b/lab7c/docs/query.jpg new file mode 100644 index 0000000000..ec2146e77c Binary files /dev/null and b/lab7c/docs/query.jpg differ diff --git a/lab7c/loki/config.yml b/lab7c/loki/config.yml new file mode 100644 index 0000000000..6a9219da04 --- /dev/null +++ b/lab7c/loki/config.yml @@ -0,0 +1,43 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + retention_enabled: true + delete_request_store: filesystem + diff --git a/lab7c/promtail/config.yml b/lab7c/promtail/config.yml new file mode 100644 index 0000000000..eb99e73798 --- /dev/null +++ b/lab7c/promtail/config.yml @@ -0,0 +1,29 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + + relabel_configs: + # Container name label (without leading slash) + - source_labels: [__meta_docker_container_name] + target_label: container + regex: "/(.*)" + replacement: "$1" + + # Propagate container labels as Loki labels + - source_labels: [__meta_docker_container_label_app] + target_label: app + - source_labels: [__meta_docker_container_label_logging] + target_label: logging + diff --git a/lab8c/docker-compose.yml b/lab8c/docker-compose.yml new file mode 100644 index 0000000000..129dc3058e --- /dev/null +++ b/lab8c/docker-compose.yml @@ -0,0 +1,142 @@ +networks: + logging: + name: logging + +volumes: + loki-data: + grafana-data: + prometheus-data: + +services: + loki: + image: grafana/loki:3.0.0 + command: -config.file=/etc/loki/config.yml + ports: + - "3100:3100" + volumes: + - ./loki/config.yml:/etc/loki/config.yml:ro + - loki-data:/loki + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3100/ready || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + start_period: 10s + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + promtail: + image: grafana/promtail:3.0.0 + command: -config.file=/etc/promtail/config.yml + ports: + - "9080:9080" + volumes: + - ./promtail/config.yml:/etc/promtail/config.yml:ro + - /var/log:/var/log:ro + - /var/lib/docker/containers:/var/lib/docker/containers:ro + - /var/run/docker.sock:/var/run/docker.sock:ro + networks: + - logging + depends_on: + - loki + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + prometheus: + image: prom/prometheus:v3.9.0 + command: + - "--config.file=/etc/prometheus/prometheus.yml" + - "--storage.tsdb.path=/prometheus" + - "--storage.tsdb.retention.time=15d" + - "--storage.tsdb.retention.size=10GB" + - "--web.enable-lifecycle" + ports: + - "9090:9090" + volumes: + - ./prometheus/prometheus.yml:/etc/prometheus/prometheus.yml:ro + - prometheus-data:/prometheus + networks: + - logging + healthcheck: + test: ["CMD-SHELL", "wget --no-verbose --tries=1 --spider http://localhost:9090/-/healthy || exit 1"] + interval: 10s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + cpus: "1.0" + memory: 1G + reservations: + cpus: "0.5" + memory: 512M + + grafana: + image: grafana/grafana:12.3.1 + ports: + - "3000:3000" + environment: + GF_AUTH_ANONYMOUS_ENABLED: "false" + GF_SECURITY_ADMIN_USER: "admin" + GF_SECURITY_ADMIN_PASSWORD: "${GRAFANA_ADMIN_PASSWORD:-admin}" + volumes: + - grafana-data:/var/lib/grafana + networks: + - logging + depends_on: + - loki + healthcheck: + test: ["CMD-SHELL", "curl -f http://localhost:3000/api/health || exit 1"] + interval: 15s + timeout: 5s + retries: 5 + start_period: 20s + deploy: + resources: + limits: + cpus: "0.5" + memory: 512M + reservations: + cpus: "0.25" + memory: 256M + + app-python: + image: tsixphoenix/devops-info-python:latest + ports: + - "8000:5000" + networks: + - logging + labels: + logging: "promtail" + app: "devops-python" + environment: + HOST: "0.0.0.0" + PORT: "5000" + healthcheck: + # python:3.13-slim has no curl; use Python + test: ["CMD", "python", "-c", "import urllib.request; urllib.request.urlopen('http://localhost:5000/health')"] + interval: 10s + timeout: 5s + retries: 5 + deploy: + resources: + limits: + cpus: "0.5" + memory: 256M + reservations: + cpus: "0.25" + memory: 128M diff --git a/lab8c/docs/LAB08.md b/lab8c/docs/LAB08.md new file mode 100644 index 0000000000..5fab199c43 --- /dev/null +++ b/lab8c/docs/LAB08.md @@ -0,0 +1,93 @@ +# Lab 8 — Metrics & Monitoring with Prometheus + +## 1. Architecture + +The app exposes metrics at `/metrics`. Prometheus scrapes that endpoint every 15 seconds, stores the time series, and Grafana queries Prometheus to draw dashboards. + +Rough flow: + +- **App** → exposes `/metrics` in Prometheus text format +- **Prometheus** → scrapes app, Loki, Grafana, and itself on the same Docker network (`logging`) +- **Grafana** → uses Prometheus as a data source (`http://prometheus:9090`) and shows RED metrics (rate, errors, duration) plus app health + +So: app and other services are scraped by Prometheus; Grafana only talks to Prometheus (and Loki for logs from Lab 7 if you use that stack too). + +## 2. Application instrumentation + +The Python app lives in `lab3c/app_python`. I added `prometheus-client==0.23.1` to `requirements.txt` and wired up metrics in `app.py`. + +**What’s exposed:** + +- **http_requests_total** (counter) — total requests with labels `method`, `endpoint`, `status`. Used for request rate and error rate. +- **http_request_duration_seconds** (histogram) — request duration with `method` and `endpoint`. Used for latency percentiles (e.g. p95). +- **http_requests_in_progress** (gauge) — how many requests are in flight right now. +- **devops_info_endpoint_calls** (counter) — per-endpoint usage (e.g. `/`, `/health`). +- **devops_info_system_collection_seconds** (histogram) — how long it takes to gather system info on the root endpoint. + +Paths are normalized to `/`, `/health`, `/metrics`, or `other` so we don’t blow up cardinality. A middleware records the start time, bumps the in-progress gauge, runs the handler, then records duration and status and decrements the gauge. + +The `/metrics` route just returns `generate_latest()` with the right content type so Prometheus can scrape it. + +## 3. Prometheus configuration + +Config is in `lab8c/prometheus/prometheus.yml`. + +- Global scrape interval: 15s. +- Four jobs: **prometheus** (self), **app** (`app-python:5000`, path `/metrics`), **loki** (`loki:3100`), **grafana** (`grafana:3000`). + +Retention (15d, 10GB) is set on the command line in `docker-compose.yml`, not in this file. + +## 4. Dashboard + +The custom dashboard is in `lab8c/docs/grafana-app-dashboard.json`. It has seven panels: + +1. **Request rate** — `sum(rate(http_requests_total[5m])) by (endpoint)` (requests per second per endpoint). +2. **Error rate (5xx)** — `sum(rate(http_requests_total{status=~"5.."}[5m]))`. +3. **Request duration p95** — `histogram_quantile(0.95, ...)` over the duration histogram. +4. **Active requests** — `http_requests_in_progress`. +5. **Status code distribution** — `sum by (status) (rate(http_requests_total[5m]))` (pie chart). +6. **Uptime (app)** — `up{job="app"}` (1 = up, 0 = down). +7. **Request duration heatmap** — `rate(http_request_duration_seconds_bucket[5m])`. + +When you import the JSON in Grafana, it will ask for a Prometheus data source; pick the one you added (URL `http://prometheus:9090`). + +## 5. PromQL examples + +- `rate(http_requests_total[5m])` — request rate over the last 5 minutes. +- `sum(rate(http_requests_total[5m])) by (endpoint)` — same, broken down by endpoint. +- `sum(rate(http_requests_total{status=~"5.."}[5m]))` — 5xx error rate (RED: errors). +- `histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le))` — 95th percentile latency in seconds (RED: duration). +- `up{job="app"}` — 1 if the app target is up, 0 if down. +- `http_requests_in_progress` — current number of requests being processed. + +## 6. Production-style setup + +In `lab8c/docker-compose.yml`: + +- **Health checks**: Prometheus uses `wget` on `/-/healthy`; the app uses `curl` on `/health`. Loki and Grafana keep their existing checks. +- **Resource limits**: Prometheus 1 CPU / 1G; Loki 1 CPU / 1G; Grafana 0.5 CPU / 512M; app 0.5 CPU / 256M. +- **Retention**: 15 days and 10GB via Prometheus command-line flags. +- **Volumes**: `prometheus-data`, `loki-data`, `grafana-data` so data survives restarts. + +## 7. Testing + +- Run the app locally from `lab3c/app_python`, then hit `http://localhost:8000/metrics` — you should see the usual Prometheus text output. +- Run the stack: `cd lab8c && docker compose up -d`. Open http://localhost:9090/targets and check that all targets (prometheus, app, loki, grafana) are UP. Run a few queries in the Prometheus UI (e.g. `up`, `rate(http_requests_total[5m])` after some traffic). +- In Grafana, add the Prometheus data source and import the dashboard from `lab8c/docs/grafana-app-dashboard.json`. Generate some traffic to the app and confirm the panels show data. + +Screenshots to put in `lab8c/docs/`: + +- `metrics-endpoint.jpg` — browser or terminal output of `/metrics`. +- `prometheus-targets.jpg` — Targets page with all UP. +- `prometheus-query.jpg` — e.g. result of `up` or `rate(http_requests_total[5m])`. +- `grafana-dashboard.jpg` — the custom dashboard with live data. + +## 8. Challenges and fixes + +- **Middleware order**: Metrics need the response status and duration, so the metrics middleware runs the handler first and then records counter/histogram/gauge. The logging middleware is separate and doesn’t affect the numbers. +- **Cardinality**: We only use a few endpoint labels (`/`, `/health`, `/metrics`, `other`) so we don’t get thousands of series from random paths. +- **Docker**: Prometheus config is mounted at `/etc/prometheus/prometheus.yml`. All scrape targets use service names on the `logging` network (`app-python:5000`, `loki:3100`, `grafana:3000`). + +## 9. Metrics vs logs (Lab 7) + +Logs (Loki) answer “what happened” — individual requests, errors, stack traces. Metrics (Prometheus) answer “how much” and “how often” — rates, percentiles, counts. You need both: use metrics for dashboards and alerts, and when something spikes, dig into the logs for context. diff --git a/lab8c/docs/grafana-app-dashboard.json b/lab8c/docs/grafana-app-dashboard.json new file mode 100644 index 0000000000..e9e2cb5c54 --- /dev/null +++ b/lab8c/docs/grafana-app-dashboard.json @@ -0,0 +1,161 @@ +{ + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 0, + "id": null, + "links": [], + "liveNow": false, + "panels": [ + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "reqps", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 0 }, + "id": 1, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "sum(rate(http_requests_total[5m])) by (endpoint)", + "legendFormat": "{{endpoint}}", + "refId": "A" + } + ], + "title": "Request Rate", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "reqps", "min": 0, "color": { "mode": "palette-classic" } }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 0 }, + "id": 2, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "sum(rate(http_requests_total{status=~\"5..\"}[5m]))", + "legendFormat": "5xx errors/s", + "refId": "A" + } + ], + "title": "Error Rate (5xx)", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "s", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 8 }, + "id": 3, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "histogram_quantile(0.95, sum(rate(http_request_duration_seconds_bucket[5m])) by (le, endpoint))", + "legendFormat": "p95 {{endpoint}}", + "refId": "A" + } + ], + "title": "Request Duration p95", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "short", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 8 }, + "id": 4, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "http_requests_in_progress", + "legendFormat": "in progress", + "refId": "A" + } + ], + "title": "Active Requests", + "type": "timeseries" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "short", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 12, "x": 0, "y": 16 }, + "id": 5, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "sum by (status) (rate(http_requests_total[5m]))", + "legendFormat": "{{status}}", + "refId": "A" + } + ], + "title": "Status Code Distribution", + "type": "piechart" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "short", "min": 0, "max": 1 }, + "overrides": [ + { "matcher": { "id": "byValue", "options": "0" }, "properties": [{ "id": "color", "value": { "fixedColor": "red", "mode": "fixed" } }] }, + { "matcher": { "id": "byValue", "options": "1" }, "properties": [{ "id": "color", "value": { "fixedColor": "green", "mode": "fixed" } }] } + ] + }, + "gridPos": { "h": 8, "w": 12, "x": 12, "y": 16 }, + "id": 6, + "options": { "colorMode": "value", "graphMode": "none", "justifyMode": "auto" }, + "targets": [ + { + "expr": "up{job=\"app\"}", + "legendFormat": "app", + "refId": "A" + } + ], + "title": "Uptime (app)", + "type": "stat" + }, + { + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "fieldConfig": { + "defaults": { "unit": "s", "min": 0 }, + "overrides": [] + }, + "gridPos": { "h": 8, "w": 24, "x": 0, "y": 24 }, + "id": 7, + "options": { "legend": { "displayMode": "list", "placement": "bottom" } }, + "targets": [ + { + "expr": "rate(http_request_duration_seconds_bucket[5m])", + "legendFormat": "{{le}}", + "refId": "A" + } + ], + "title": "Request Duration Heatmap", + "type": "heatmap" + } + ], + "refresh": "10s", + "schemaVersion": 39, + "style": "dark", + "tags": ["devops", "app-metrics"], + "templating": { "list": [] }, + "time": { "from": "now-1h", "to": "now" }, + "timepicker": {}, + "timezone": "browser", + "title": "DevOps Info Service - Metrics", + "uid": "devops-app-metrics", + "version": 1, + "weekStart": "" +} diff --git a/lab8c/docs/grafana-dashboard.jpg b/lab8c/docs/grafana-dashboard.jpg new file mode 100644 index 0000000000..b30001e68e Binary files /dev/null and b/lab8c/docs/grafana-dashboard.jpg differ diff --git a/lab8c/docs/metrics-endpoint.jpg b/lab8c/docs/metrics-endpoint.jpg new file mode 100644 index 0000000000..0a5421dd13 Binary files /dev/null and b/lab8c/docs/metrics-endpoint.jpg differ diff --git a/lab8c/docs/prometheus-query.jpg b/lab8c/docs/prometheus-query.jpg new file mode 100644 index 0000000000..226cc594be Binary files /dev/null and b/lab8c/docs/prometheus-query.jpg differ diff --git a/lab8c/docs/prometheus-targets.jpg b/lab8c/docs/prometheus-targets.jpg new file mode 100644 index 0000000000..019cdc5bd1 Binary files /dev/null and b/lab8c/docs/prometheus-targets.jpg differ diff --git a/lab8c/loki/config.yml b/lab8c/loki/config.yml new file mode 100644 index 0000000000..a2d65e6e87 --- /dev/null +++ b/lab8c/loki/config.yml @@ -0,0 +1,42 @@ +auth_enabled: false + +server: + http_listen_port: 3100 + +common: + path_prefix: /loki + storage: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + replication_factor: 1 + ring: + instance_addr: 127.0.0.1 + kvstore: + store: inmemory + +schema_config: + configs: + - from: 2024-01-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h + +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks + +limits_config: + retention_period: 168h + +compactor: + working_directory: /loki/compactor + retention_enabled: true + delete_request_store: filesystem diff --git a/lab8c/prometheus/prometheus.yml b/lab8c/prometheus/prometheus.yml new file mode 100644 index 0000000000..80f4e88f91 --- /dev/null +++ b/lab8c/prometheus/prometheus.yml @@ -0,0 +1,23 @@ +global: + scrape_interval: 15s + evaluation_interval: 15s + +scrape_configs: + - job_name: "prometheus" + static_configs: + - targets: ["localhost:9090"] + + - job_name: "app" + static_configs: + - targets: ["app-python:5000"] + metrics_path: "/metrics" + + - job_name: "loki" + static_configs: + - targets: ["loki:3100"] + metrics_path: "/metrics" + + - job_name: "grafana" + static_configs: + - targets: ["grafana:3000"] + metrics_path: "/metrics" diff --git a/lab8c/promtail/config.yml b/lab8c/promtail/config.yml new file mode 100644 index 0000000000..395ff7b797 --- /dev/null +++ b/lab8c/promtail/config.yml @@ -0,0 +1,26 @@ +server: + http_listen_port: 9080 + grpc_listen_port: 0 + +positions: + filename: /tmp/positions.yaml + +clients: + - url: http://loki:3100/loki/api/v1/push + +scrape_configs: + - job_name: docker + docker_sd_configs: + - host: unix:///var/run/docker.sock + refresh_interval: 5s + + relabel_configs: + - source_labels: [__meta_docker_container_name] + target_label: container + regex: "/(.*)" + replacement: "$1" + + - source_labels: [__meta_docker_container_label_app] + target_label: app + - source_labels: [__meta_docker_container_label_logging] + target_label: logging diff --git a/lab9c/k8s/README.md b/lab9c/k8s/README.md new file mode 100644 index 0000000000..723ba24ece --- /dev/null +++ b/lab9c/k8s/README.md @@ -0,0 +1,168 @@ +# Lab 9 — Kubernetes Fundamentals + +This lab is complete for all **required** tasks (bonus not included). + +## 1) Architecture Overview + +I used **kind** (Kubernetes in Docker) because it is quick to run locally on Windows and good for repeatable tests. + +- Deployment: `devops-info-python` (3 replicas) +- Service: `devops-info-python-service` (`NodePort`, `80 -> 5000`, nodePort `30080`) +- Update strategy: RollingUpdate (`maxSurge: 1`, `maxUnavailable: 0`) +- Resources per pod: + - requests: `100m` CPU, `128Mi` memory + - limits: `300m` CPU, `256Mi` memory + +Traffic path used for local verification: +`kubectl port-forward` -> Service -> Pods. + +## 2) Manifest Files + +### `deployment.yml` + +Contains: + +- 3 replicas +- labels/selectors +- image `tsixphoenix/devops-info-python:lab9` +- readiness + liveness probes on `/health` +- resource requests/limits +- non-root security context +- rolling update strategy + +### `service.yml` + +Contains: + +- `type: NodePort` +- selector `app: devops-info-python` +- `port: 80`, `targetPort: 5000`, `nodePort: 30080` + +## 3) Deployment Evidence + +### Cluster setup + +```bash +kubectl cluster-info --context kind-lab9 +kubectl get nodes -o wide +``` + +```text +Kubernetes control plane is running at https://127.0.0.1:... +lab9-control-plane Ready control-plane v1.32.2 +``` + +### Deployed resources + +```bash +kubectl get all +kubectl get pods,svc -o wide +kubectl describe deployment devops-info-python +``` + +Observed: + +- deployment `devops-info-python` is `3/3 READY` +- service `devops-info-python-service` is `NodePort 80:30080/TCP` +- probes and rolling strategy are visible in `describe` + +### App is reachable + +```bash +kubectl port-forward service/devops-info-python-service 8080:80 +curl http://127.0.0.1:8080/health +curl http://127.0.0.1:8080/ +``` + +Example health response: + +```text +{"status":"healthy","timestamp":"...","uptime_seconds":...} +``` + +## 4) Operations Performed + +### Deploy + +```bash +kubectl apply -f lab9c/k8s/deployment.yml -f lab9c/k8s/service.yml +kubectl rollout status deployment/devops-info-python +``` + +### Scale to 5 replicas + +```bash +kubectl scale deployment/devops-info-python --replicas=5 +kubectl rollout status deployment/devops-info-python +kubectl get deployment/devops-info-python +``` + +Result: `READY 5/5, AVAILABLE 5`. + +### Rolling update + +Updated `RELEASE_ID` in deployment and applied again: + +```bash +kubectl apply -f lab9c/k8s/deployment.yml +kubectl rollout status deployment/devops-info-python +kubectl rollout history deployment/devops-info-python +``` + +Result: rollout completed successfully, revision history updated. + +### Zero-downtime check during update + +I called `/health` repeatedly during rollout. All responses were HTTP 200. + +### Rollback + +```bash +kubectl rollout undo deployment/devops-info-python +kubectl rollout status deployment/devops-info-python +kubectl rollout history deployment/devops-info-python +``` + +Result: rollback completed and previous revision was restored. + +### Service verification + +```bash +kubectl describe service devops-info-python-service +kubectl get endpoints devops-info-python-service +``` + +Result: service endpoints matched running pod IPs on port 5000. + +## 5) Production Considerations + +- Readiness probe keeps not-ready pods out of traffic. +- Liveness probe restarts broken pods. +- Requests/limits prevent noisy-neighbor issues and help scheduling. +- For real production, I would add: + - namespace isolation + network policies + - HPA + - ConfigMaps/Secrets + - PodDisruptionBudget + - Ingress with TLS +- Observability plan: + - metrics in Prometheus + - logs in Loki/Grafana + - alerts for 5xx rate, restarts, and pod availability + +## 6) Challenges & Solutions + +### No local cluster available initially + +- `kubectl` existed, but no running cluster. +- Fixed by creating a local `kind` cluster (`kind-lab9`). + +### First app rollout failed (CrashLoopBackOff) + +- Cause: old image/tag mismatch. +- Fix: built fresh image `tsixphoenix/devops-info-python:lab9`, loaded it into kind, and used that tag in deployment. + +### NodePort access from host in kind setup + +- Direct node IP access was unreliable in this environment. +- Used `kubectl port-forward` for stable local verification. diff --git a/lab9c/k8s/deployment.yml b/lab9c/k8s/deployment.yml new file mode 100644 index 0000000000..48e23cab7a --- /dev/null +++ b/lab9c/k8s/deployment.yml @@ -0,0 +1,67 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: devops-info-python + labels: + app: devops-info-python + app.kubernetes.io/name: devops-info-python + app.kubernetes.io/component: backend +spec: + replicas: 3 + strategy: + type: RollingUpdate + rollingUpdate: + maxSurge: 1 + maxUnavailable: 0 + selector: + matchLabels: + app: devops-info-python + template: + metadata: + labels: + app: devops-info-python + app.kubernetes.io/name: devops-info-python + app.kubernetes.io/component: backend + spec: + containers: + - name: app + image: tsixphoenix/devops-info-python:lab9 + imagePullPolicy: IfNotPresent + ports: + - name: http + containerPort: 5000 + env: + - name: HOST + value: "0.0.0.0" + - name: PORT + value: "5000" + - name: RELEASE_ID + value: "v2" + resources: + requests: + cpu: "100m" + memory: "128Mi" + limits: + cpu: "300m" + memory: "256Mi" + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + timeoutSeconds: 2 + failureThreshold: 3 + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 15 + periodSeconds: 10 + timeoutSeconds: 2 + failureThreshold: 3 + securityContext: + allowPrivilegeEscalation: false + readOnlyRootFilesystem: false + runAsNonRoot: true + runAsUser: 10001 diff --git a/lab9c/k8s/service.yml b/lab9c/k8s/service.yml new file mode 100644 index 0000000000..9a264cd259 --- /dev/null +++ b/lab9c/k8s/service.yml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: Service +metadata: + name: devops-info-python-service + labels: + app: devops-info-python + app.kubernetes.io/name: devops-info-python +spec: + type: NodePort + selector: + app: devops-info-python + ports: + - name: http + protocol: TCP + port: 80 + targetPort: 5000 + nodePort: 30080