flowelx · flowelx · Mar 26, 2026
diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md
diff --git a/app_python/app.py b/app_python/app.py
@@ -12,12 +12,9 @@
 import uvicorn
 from prometheus_client import Counter, Histogram, Gauge, generate_latest, REGISTRY
 
-# Очищаем registry при запуске (чтобы избежать дубликатов)
-# Это решит проблему с Duplicated timeseries
 for collector in list(REGISTRY._collector_to_names):
     REGISTRY.unregister(collector)
 
-# Метрики с уникальными именами
 http_requests_counter = Counter(
     'app_http_requests_total', 
     'Total HTTP requests',
@@ -76,17 +73,14 @@ def format(self, record):
 
 logger = logging.getLogger(__name__)
 
-# Используем lifespan контекстный менеджер вместо on_event (решает deprecation warning)
 @asynccontextmanager
 async def lifespan(app: FastAPI):
-    # Startup
     logger.info("Application started", extra={
         'host': HOST,
         'port': PORT,
         'hostname': socket.gethostname()
     })
     yield
-    # Shutdown
     logger.info("Application shutting down")
 
 app = FastAPI(lifespan=lifespan)
@@ -103,10 +97,8 @@ async def log_requests(request: Request, call_next):
     """Логирование и метрики всех запросов"""
     client_ip = request.client.host if request.client else "unknown"
 
-    # Увеличиваем счетчик активных запросов
     active_requests_gauge.inc()
 
-    # Засекаем время начала запроса
     start_time = time.time()
 
     logger.info("Request started", extra={
@@ -118,17 +110,14 @@ async def log_requests(request: Request, call_next):
     try:
         response = await call_next(request)
 
-        # Обновляем метрики
         duration = time.time() - start_time
 
-        # Считаем запросы по методу, endpoint'у и статусу
         http_requests_counter.labels(
             method=request.method,
             endpoint=request.url.path,
             status_code=str(response.status_code)
         ).inc()
 
-        # Записываем длительность
         http_request_duration.labels(
             method=request.method,
             endpoint=request.url.path
@@ -144,7 +133,6 @@ async def log_requests(request: Request, call_next):
 
         return response
     finally:
-        # Уменьшаем счетчик активных запросов
         active_requests_gauge.dec()
 
 
@@ -166,7 +154,6 @@ def get_uptime():
 async def get_service_info(request: Request):
     client_ip = request.client.host if request.client else '127.0.0.1'
 
-    # Измеряем время сбора системной информации
     start_time = time.time()
 
     service_info = {
@@ -176,7 +163,6 @@ async def get_service_info(request: Request):
         'uptime': get_uptime()['human']
     }
 
-    # Записываем время сбора информации
     system_info_duration_histogram.observe(time.time() - start_time)
 
     logger.info("Home page accessed", extra={
@@ -202,7 +188,6 @@ async def health_check(request: Request):
 async def test_error(request: Request):
     client_ip = request.client.host if request.client else 'unknown'
 
-    # Пример вызова внешнего API (для демонстрации метрики)
     external_api_calls_counter.labels(api_name='test_api').inc()
 
     logger.error("Test error", extra={

diff --git a/k8s/README.md b/k8s/README.md
@@ -0,0 +1,191 @@
+# Lab 9 — Kubernetes Fundamentals
+
+## Architecture Overview
+
+### Deployment Architecture
+
+My Kubernetes implementation consists of a three-tier architecture for a Python web application with the following components:
+
+**Control Plane (Minikube):**
+- Single node cluster running all control plane components
+- API Server, Scheduler, Controller Manager, etcd
+
+**Application Layer:**
+- **Deployment**: `python-app` managing 5 pod replicas
+- **Pods**: Each running a Python web server on port 5000
+- **Labels**: Organized with `app=python-app`, `environment=production`
+
+**Networking Layer:**
+- **Service**: NodePort type (`python-app-service`)
+- **Traffic Flow**: External → NodePort (30080) → Service (5000) → Pod (5000)
+- **Load Balancing**: Service distributes traffic across all 5 replicas
+
+### Resource Allocation Strategy
+
+| Component | CPU Request | CPU Limit | Memory Request | Memory Limit |
+|-----------|------------|-----------|----------------|--------------|
+| Each Pod | 100m (0.1 core) | 200m (0.2 core) | 128Mi | 256Mi |
+| **Total Cluster** | 500m | 1000m | 640Mi | 1280Mi |
+
+**Rationale**: Requests ensure each pod gets minimum resources; limits prevent resource starvation. Conservative limits chosen for local development with room for scaling.
+
+## Manifest Files
+
+### 1. `deployment.yml`
+Manages application lifecycle with production best practices:
+
+```yaml
+replicas: 5
+strategy: RollingUpdate
+  maxSurge: 1
+  maxUnavailable: 0
+```
+
+**Why 5 replicas?** Provides fault tolerance (can lose up to 2 pods) and handles moderate traffic spikes in development.
+
+**Health Probes:**
+- **Liveness**: `/health` endpoint checked every 10 seconds, starts after 30 seconds
+- **Readiness**: Same endpoint checked every 5 seconds, starts after 5 secondss
+
+### 2. `service.yml`
+Exposes application internally and externally:
+
+```yaml
+type: NodePort
+selector: app=python-app
+ports: 5000:30080
+```
+
+**Why NodePort?** Perfect for Minikube local development. Provides external access without cloud load balancers.
+
+## Deployment Evidence
+
+### Current Cluster State
+
+![](screenshots/get-all.jpg)
+
+### Deployment Details
+
+![](screenshots/describe-deployment.jpg)
+
+### Application Verification
+
+![](screenshots/curl.jpg)
+
+## Operations Performed
+
+### 1. Initial Deployment
+```bash
+kubectl apply -f k8s/deployment.yml
+
+kubectl apply -f k8s/service.yml
+```
+
+### 2. Scaling to 5 Replicas
+```bash
+kubectl scale deployment python-app --replicas=5
+
+kubectl get pods -l app=python-app
+```
+
+### 3. Rolling Update Demonstration
+```bash
+kubectl set image flowelx/python-app python-app=myapp:v2
+
+kubectl rollout status flowelx/python-app
+```
+
+### 4. Service Access Method
+```bash
+curl http://$(minikube ip):30080/health
+```
+
+## Production Considerations
+
+### Health Checks Implementation
+
+**Liveness Probe** (`/health` every 10s):
+- Purpose: Detects if application is deadlocked or frozen
+- Failure action: Kubernetes restarts the container
+- Initial delay 30s gives app time to start before checking
+
+**Readiness Probe** (`/health` every 5s):
+- Purpose: Determines if pod can receive traffic
+- Failure action: Removes pod from service endpoints
+- Initial delay 5s ensures app starts accepting traffic quickly
+
+Why separate probes? Liveness prevents zombies, readiness prevents traffic to unready pods. Combined they ensure both availability and reliability.
+
+### Resource Limits Rationale
+
+**Requests (128Mi CPU/100m CPU):**
+- Guarantees minimum resources for stable operation
+- Ensures Kubernetes schedules pods on nodes with available capacity
+
+**Limits (256Mi CPU/200m CPU):**
+- Prevents any single pod from consuming all node resources
+- Protects cluster from noisy neighbor problems
+- Conservative limits chosen based on local testing showing app uses ~100Mi RAM
+
+### Production Improvements
+
+1. **High Availability:**
+   - Deploy to multiple availability zones
+   - Use Horizontal Pod Autoscaler (HPA) for dynamic scaling
+   - Implement Pod Disruption Budgets (PDB) for maintenance
+
+2. **Security:**
+   - Use private image registry with image pull secrets
+   - Implement Network Policies for pod-to-pod communication
+   - Enable Pod Security Standards (PSS) for pod isolation
+
+3. **Observability:**
+   - Deploy Prometheus for metrics collection
+   - Add Grafana for visualization
+   - Implement structured logging with Loki/ELK stack
+   - Add distributed tracing with Jaeger
+
+4. **Configuration Management:**
+   - Use ConfigMaps for non-sensitive configuration
+   - Use Secrets for sensitive data (database passwords, API keys)
+   - Consider Helm charts for complex deployments
+
+### Monitoring Strategy
+
+Current implementation includes:
+- **Readiness/Liveness probes**: Basic health monitoring
+- **Metrics endpoint**: `/metrics` exposed for Prometheus scraping
+- **Resource monitoring**: Kubernetes native metrics via `kubectl top`
+
+Production monitoring would add:
+- **Prometheus**: Scrape metrics every 15s with alert rules
+- **Grafana**: Dashboards for CPU, memory, request rates, error rates
+- **AlertManager**: Alert on high error rates, pod restarts, resource pressure
+- **Log aggregation**: Centralized logging with correlation IDs
+
+## Challenges & Solutions
+
+### CrashLoopBackOff Due to Port Mismatch
+
+**Problem:** Pods kept crashing with logs showing:
+```
+Uvicorn running on http://0.0.0.0:5000
+INFO:     Shutting down
+```
+
+The application started successfully but immediately shut down because liveness probes were checking port 8000 instead of 5000.
+
+**Debugging Process:**
+1  Checked pod logs: `kubectl logs python-app-xxx`
+2. Verified app was listening on port 5000
+3. Checked deployment configuration: `kubectl describe pod python-app-xxx`
+4. Discovered port mismatch between containerPort (8000) and actual app port (5000)
+
+**Solution:**
+Updated deployment.yml to use port 5000 in:
+- `containerPort`
+- `livenessProbe.port`
+- `readinessProbe.port`
+- Service targetPort
+
+**Lesson Learned:** Always verify the actual port your application listens on and ensure consistency across container ports, probes, and service configurations.
diff --git a/k8s/deployment.yml b/k8s/deployment.yml
@@ -0,0 +1,76 @@
+apiVersion: apps/v1
+kind: Deployment
+metadata:
+  name: python-app
+  namespace: default
+  labels:
+    app: python-app
+    version: v1
+    environment: production
+spec:
+  replicas: 3
+  strategy:
+    type: RollingUpdate
+    rollingUpdate:
+      maxSurge: 1
+      maxUnavailable: 0
+  selector:
+    matchLabels:
+      app: python-app
+  template:
+    metadata:
+      labels:
+        app: python-app
+        version: v1
+    spec:
+      containers:
+      - name: python-app
+        image: flowelx/fastapi-lab-app:latest
+        imagePullPolicy: IfNotPresent
+        ports:
+        - containerPort: 5000
+          name: http
+          protocol: TCP
+
+        resources:
+          requests:
+            memory: "128Mi"
+            cpu: "100m"
+          limits:
+            memory: "256Mi"
+            cpu: "200m"
+
+        livenessProbe:
+          httpGet:
+            path: /health
+            port: 5000
+          initialDelaySeconds: 30
+          periodSeconds: 10
+          timeoutSeconds: 5
+          failureThreshold: 3
+
+        readinessProbe:
+          httpGet:
+            path: /health
+            port: 5000
+          initialDelaySeconds: 5
+          periodSeconds: 5
+          timeoutSeconds: 3
+          successThreshold: 1
+          failureThreshold: 3
+
+        env:
+        - name: APP_ENV
+          value: "production"
+        - name: LOG_LEVEL
+          value: "info"
+
+        securityContext:
+          runAsNonRoot: true
+          runAsUser: 1000
+          runAsGroup: 1000
+          allowPrivilegeEscalation: false
+          capabilities:
+            drop:
+            - ALL
+          readOnlyRootFilesystem: false
diff --git a/k8s/screenshots/curl.jpg b/k8s/screenshots/curl.jpg
diff --git a/k8s/screenshots/describe-deployment.jpg b/k8s/screenshots/describe-deployment.jpg
diff --git a/k8s/screenshots/get-all.jpg b/k8s/screenshots/get-all.jpg
diff --git a/k8s/service.yml b/k8s/service.yml
@@ -0,0 +1,18 @@
+apiVersion: v1
+kind: Service
+metadata:
+  name: python-app-service
+  namespace: default
+  labels:
+    app: python-app
+    tier: backend
+spec:
+  type: NodePort
+  selector:
+    app: python-app
+  ports:
+  - port: 5000
+    targetPort: 5000
+    nodePort: 30080
+    protocol: TCP
+    name: http