From 68e5e6b1a20821a65bb64baf1be99b06b471cb8a Mon Sep 17 00:00:00 2001
From: Lasse Benninga <devops.pipeline@example.com>
Date: Wed, 3 Jun 2026 14:01:58 +0200
Subject: [PATCH 1/2] feat: scaffold Week 6 assignment (Deploy to Azure)

Replaces the empty template stubs with a real scaffold derived from
`Data Track/Week 6/week_6__8_assignment.md`. Students implement a Container
App Job pipeline that uploads raw JSON to Azure Blob Storage and upserts
rows into Azure Database for PostgreSQL.

Layout
- `src/pipeline.py`: `raise NotImplementedError` stubs for the three
  pipeline functions (config, blob upload, Postgres upsert) plus a `run()`
  orchestrator.
- `Dockerfile`: cache-friendly starter with TODO comments for the layer
  order students must complete.
- `requirements.txt`: TODO entries for `azure-storage-blob` and
  `psycopg2-binary` pins.
- `AI_ASSIST.md`: section headers + TODO placeholders for Task 7.
- `.env.example`: connection-string template with placeholders only.
- `docs/`: target folder for the Task 5 Execution-history screenshot.

Autograder (`.hyf/test.sh`)
Static-analysis only. Azure deployment is unverifiable in CI (no
credentials), so the grader checks code shape, not live deployment:
required files, pinned deps, Dockerfile layer order, env-var reads,
`contextlib.closing()`, azure SDK logger silencing, `ON CONFLICT ... DO
UPDATE` upsert with `%s` placeholders, `sslmode=require`,
`BlobServiceClient` import, AI report fill-in, README `## Verification`
heading + embedded `docs/` image, screenshot presence.

Verified locally
- Bare scaffold: 13/100, pass=false.
- Working solution (clone at `~/Documents/github/hyf/data-assignment-week-6-test`):
  100/100, pass=true.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .devcontainer/devcontainer.json        |  15 ++
 .env.example                           |  11 +
 .gitignore                             | 161 ++------------
 .hyf/grader_lib.sh                     | 250 ++++++++++++++++++++++
 .hyf/test.sh                           | 285 ++++++++++++++++++++++++-
 AI_ASSIST.md                           |  25 +++
 Dockerfile                             |  21 ++
 README.md                              | 113 +++++++++-
 docs/.gitkeep                          |   2 +
 requirements.txt                       |  13 ++
 task-1/task 1 files => src/__init__.py |   0
 src/pipeline.py                        | 103 +++++++++
 task-2/task 2 files                    |   0
 13 files changed, 836 insertions(+), 163 deletions(-)
 create mode 100644 .devcontainer/devcontainer.json
 create mode 100644 .env.example
 create mode 100644 .hyf/grader_lib.sh
 mode change 100644 => 100755 .hyf/test.sh
 create mode 100644 AI_ASSIST.md
 create mode 100644 Dockerfile
 create mode 100644 docs/.gitkeep
 create mode 100644 requirements.txt
 rename task-1/task 1 files => src/__init__.py (100%)
 create mode 100644 src/pipeline.py
 delete mode 100644 task-2/task 2 files

diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..3184532
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,15 @@
+{
+  "name": "Week 6: Deploy to Azure",
+  "image": "mcr.microsoft.com/devcontainers/python:3.11",
+  "features": {
+    "ghcr.io/devcontainers/features/docker-in-docker:2": {},
+    "ghcr.io/devcontainers/features/azure-cli:1": {}
+  },
+  "postCreateCommand": "pip install -r requirements.txt",
+  "remoteUser": "vscode",
+  "customizations": {
+    "vscode": {
+      "extensions": ["ms-python.python", "ms-python.vscode-pylance"]
+    }
+  }
+}
diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..755ba01
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,11 @@
+# Copy to .env (which is gitignored) and fill in. Never commit real values.
+#
+# Your teacher will provide these connection strings, or you can pull them
+# from Azure Key Vault as described in Chapter 5 (Getting your connection
+# strings). Check Chapter 4 for the full Postgres connection-string format
+# Azure requires (host suffix and ssl flag).
+
+POSTGRES_URL=postgresql://<user>:<password>@<server>.postgres.database.azure.com:5432/<db>?<ssl-flag>
+AZURE_STORAGE_CONNECTION_STRING=DefaultEndpointsProtocol=https;AccountName=...;AccountKey=...;EndpointSuffix=core.windows.net
+SOURCE_NAME=weather
+LOG_LEVEL=INFO
diff --git a/.gitignore b/.gitignore
index 2b76d7c..f6a80c0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,156 +3,31 @@
 Thumbs.db
 [Dd]esktop.ini
 
-# hyf
+# HYF auto-grader output
 .hyf/score.json
 
-# Editor and IDE settings
+# Editor / IDE
 .vscode/
 .idea/
-*.iml
 *.code-workspace
-*.sublime-project
-*.sublime-workspace
-.history/
-.ionide/
 
-# Logs
-logs
-*.log
-npm-debug.log*
-yarn-debug.log*
-yarn-error.log*
-lerna-debug.log*
-
-# Diagnostic reports (https://nodejs.org/api/report.html)
-report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
-
-# Runtime data
-pids
-*.pid
-*.seed
-*.pid.lock
-
-# Directory for instrumented libs generated by jscoverage/JSCover
-lib-cov
-
-# Coverage directory used by tools like istanbul
-coverage
-*.lcov
-
-# nyc test coverage
-.nyc_output
-
-# Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
-.grunt
-
-# Bower dependency directory (https://bower.io/)
-bower_components
-
-# node-waf configuration
-.lock-wscript
-
-# Compiled binary addons (https://nodejs.org/api/addons.html)
-build/Release
-
-# Dependency directories
-node_modules/
-jspm_packages/
-
-# Snowpack dependency directory (https://snowpack.dev/)
-web_modules/
-
-# TypeScript cache
-*.tsbuildinfo
-
-# Optional npm cache directory
-.npm
-
-# Optional eslint cache
-.eslintcache
-
-# Optional stylelint cache
-.stylelintcache
-
-# Optional REPL history
-.node_repl_history
-
-# Output of 'npm pack'
-*.tgz
-
-# Yarn Integrity file
-.yarn-integrity
-
-# dotenv environment variable files
+# Python
+__pycache__/
+*.pyc
+*.py[cod]
+*.pyo
+.Python
+.venv/
+venv/
+*.egg-info/
+dist/
+build/
+
+# Environments and secrets
 .env
 .env.*
 !.env.example
 
-# parcel-bundler cache (https://parceljs.org/)
-.cache
-.parcel-cache
-
-# Next.js build output
-.next
-out
-
-# Nuxt.js build / generate output
-.nuxt
-dist
-
-# Gatsby files
-.cache/
-# Comment in the public line in if your project uses Gatsby and not Next.js
-# https://nextjs.org/blog/next-9-1#public-directory-support
-# public
-
-# vuepress build output
-.vuepress/dist
-
-# vuepress v2.x temp and cache directory
-.temp
-.cache
-
-# Sveltekit cache directory
-.svelte-kit/
-
-# vitepress build output
-**/.vitepress/dist
-
-# vitepress cache directory
-**/.vitepress/cache
-
-# Docusaurus cache and generated files
-.docusaurus
-
-# Serverless directories
-.serverless/
-
-# FuseBox cache
-.fusebox/
-
-# DynamoDB Local files
-.dynamodb/
-
-# Firebase cache directory
-.firebase/
-
-# TernJS port file
-.tern-port
-
-# Stores VSCode versions used for testing VSCode extensions
-.vscode-test
-
-# yarn v3
-.pnp.*
-.yarn/*
-!.yarn/patches
-!.yarn/plugins
-!.yarn/releases
-!.yarn/sdks
-!.yarn/versions
-
-# Vite logs files
-vite.config.js.timestamp-*
-vite.config.ts.timestamp-*
-
+# Local runtime output (the cloud copies are the source of truth)
+output/
+*.log
diff --git a/.hyf/grader_lib.sh b/.hyf/grader_lib.sh
new file mode 100644
index 0000000..d383a4a
--- /dev/null
+++ b/.hyf/grader_lib.sh
@@ -0,0 +1,250 @@
+#!/usr/bin/env bash
+# grader_lib.sh — shared helpers for HYF Data Track autograders.
+# Source this at the top of test.sh:
+#   source "$(dirname "$0")/grader_lib.sh"
+#
+# Provides: pass(), fail(), warn(), print_results(), write_score(),
+# and a set of common static-analysis checks derived from recurring
+# PR review patterns across cohort c55.
+
+_grader_details=()
+
+pass() { _grader_details+=("✓ PASS  $1"); }
+fail() { _grader_details+=("✗ FAIL  $1"); }
+warn() { _grader_details+=("⚠ WARN  $1"); }
+
+print_results() {
+  local header="${1:-Autograder Results}"
+  echo ""
+  echo "=== $header ==="
+  for line in "${_grader_details[@]}"; do echo "  $line"; done
+  echo ""
+}
+
+write_score() {
+  # write_score <score> <passing> [<outfile>]
+  local score="$1"
+  local passing="$2"
+  local outfile="${3:-$(dirname "${BASH_SOURCE[0]}")/score.json}"
+  local pass_flag="false"
+  [[ "$score" -ge "$passing" ]] && pass_flag="true"
+  cat > "$outfile" << JSON
+{
+  "score": $score,
+  "pass": $pass_flag,
+  "passingScore": $passing
+}
+JSON
+  echo "Score: $score / 100  (passing: $passing)  pass=$pass_flag"
+}
+
+# ── Common static-analysis checks ────────────────────────────────────────────
+# Each function: returns 0 on pass, 1 on fail/warn (for caller logic).
+# All feedback goes through pass()/fail()/warn() so it appears in print_results.
+
+check_no_print_statements() {
+  # Usage: check_no_print_statements <dir> [label]
+  # Flags bare print() calls that should be logging calls.
+  local dir="${1:-.}"
+  local label="${2:-$dir}"
+  local found
+  found=$(grep -rn "^[[:space:]]*print(" "$dir" --include="*.py" 2>/dev/null | grep -v "# noqa" || true)
+  if [[ -n "$found" ]]; then
+    local count
+    count=$(echo "$found" | wc -l | tr -d ' ')
+    warn "$label: $count print() call(s) found — use logging.info/warning/error instead (see Week 1 Ch1)"
+    return 1
+  fi
+  return 0
+}
+
+check_no_notimplemented() {
+  # Usage: check_no_notimplemented <dir> [label]
+  # Flags NotImplementedError stubs left in after implementation.
+  local dir="${1:-.}"
+  local label="${2:-$dir}"
+  local found
+  found=$(grep -rn "raise NotImplementedError" "$dir" --include="*.py" 2>/dev/null || true)
+  if [[ -n "$found" ]]; then
+    fail "$label: raise NotImplementedError still present — remove stubs before submitting"
+    return 1
+  fi
+  return 0
+}
+
+check_no_relative_imports() {
+  # Usage: check_no_relative_imports <dir> [label]
+  # Flags `from .module import x` in scripts not inside a proper package.
+  # Relative imports break the grader: python3 src/cleaner.py fails with
+  # "attempted relative import with no known parent package".
+  local dir="${1:-.}"
+  local label="${2:-$dir}"
+  local found
+  found=$(grep -rn "^from \." "$dir" --include="*.py" 2>/dev/null || true)
+  if [[ -n "$found" ]]; then
+    fail "$label: relative import found (from .module) — use absolute: 'from src.module import x'"
+    return 1
+  fi
+  return 0
+}
+
+check_no_logging_in_utils() {
+  # Usage: check_no_logging_in_utils <utils_file>
+  # utils.py should be pure helpers; logging config belongs in the entry point.
+  local file="${1:-task-1/src/utils.py}"
+  if [[ ! -f "$file" ]]; then return 0; fi
+  if grep -qE "logging\.basicConfig|logging\.getLogger" "$file"; then
+    warn "$file: logging.basicConfig/getLogger found — logging setup belongs in cleaner.py or the entry-point, not in utils"
+    return 1
+  fi
+  return 0
+}
+
+check_gitignore_python() {
+  # Usage: check_gitignore_python [<gitignore_path>]
+  # Warns when Python cache patterns are absent from .gitignore.
+  local gi="${1:-.gitignore}"
+  if [[ ! -f "$gi" ]]; then
+    warn ".gitignore is missing — add one so __pycache__/ and *.pyc are not committed"
+    return 1
+  fi
+  local ok=true
+  if ! grep -q "__pycache__" "$gi"; then
+    warn ".gitignore missing __pycache__/ — Python bytecode cache dirs should not be committed"
+    ok=false
+  fi
+  if ! grep -qE "^\*\.pyc$|^.*\*\.pyc" "$gi"; then
+    warn ".gitignore missing *.pyc — compiled Python files should not be committed"
+    ok=false
+  fi
+  if ! grep -qE "^\.env$|^\.env\b" "$gi"; then
+    warn ".gitignore missing .env — secret files should not be committed"
+    ok=false
+  fi
+  if [[ "$ok" = true ]]; then pass ".gitignore correctly excludes __pycache__/, *.pyc, and .env"; fi
+}
+
+check_screenshot_is_png() {
+  # Usage: check_screenshot_is_png <expected_path> [<wrong_ext_glob>]
+  # Awards full credit for .png, warns (and still credits) for .jpg/.jpeg,
+  # zero for missing. Matches the pattern flagged in c55 PR reviews.
+  local expected_png="$1"
+  local dir
+  dir="$(dirname "$expected_png")"
+  local base
+  base="$(basename "$expected_png" .png)"
+
+  if [[ -s "$expected_png" ]]; then
+    pass "screenshot is $expected_png (.png format ✓)"
+    return 0
+  fi
+  for ext in jpg jpeg; do
+    if [[ -s "$dir/$base.$ext" ]]; then
+      warn "screenshot is .$ext but should be .png — rename to $base.png (partial credit still given)"
+      return 1
+    fi
+  done
+  fail "screenshot missing: $expected_png not found"
+  return 2
+}
+
+check_silent_zero_in_except() {
+  # Usage: check_silent_zero_in_except <file>
+  # Detects the pattern: try: x = compute() / except: x = 0
+  # which silently corrupts data instead of skipping or raising.
+  local file="$1"
+  if [[ ! -f "$file" ]]; then return 0; fi
+  local found
+  found=$(python3 - "$file" 2>/dev/null << 'PY'
+import ast, sys
+try:
+    tree = ast.parse(open(sys.argv[1]).read())
+except SyntaxError:
+    sys.exit(0)
+for node in ast.walk(tree):
+    if isinstance(node, ast.ExceptHandler):
+        for stmt in node.body:
+            if isinstance(stmt, ast.Assign):
+                if isinstance(stmt.value, ast.Constant) and stmt.value.value == 0:
+                    print(f"line {stmt.lineno}: '{ast.unparse(stmt)}' — sets field to 0 in except block (silent data corruption)")
+PY
+)
+  if [[ -n "$found" ]]; then
+    warn "$file: silent 0-assignment in except block — skip the row or raise instead of setting to 0:\n    $found"
+    return 1
+  fi
+  return 0
+}
+
+check_exception_logged() {
+  # Usage: check_exception_logged <dir>
+  # Warns when except blocks log/print a message but don't include the
+  # exception variable (e, err, exc), meaning the error type is lost.
+  local dir="${1:-.}"
+  local found
+  found=$(python3 - "$dir" 2>/dev/null << 'PY'
+import ast, os, sys
+issues = []
+for root, _, files in os.walk(sys.argv[1]):
+    for fname in files:
+        if not fname.endswith(".py"):
+            continue
+        path = os.path.join(root, fname)
+        try:
+            tree = ast.parse(open(path).read())
+        except SyntaxError:
+            continue
+        for node in ast.walk(tree):
+            if not isinstance(node, ast.ExceptHandler):
+                continue
+            exc_var = node.name  # e.g. "e" in `except ValueError as e`
+            if not exc_var:
+                continue
+            for stmt in node.body:
+                for call in ast.walk(stmt):
+                    if not isinstance(call, ast.Call):
+                        continue
+                    # Is it a logging.* or print call?
+                    func = call.func
+                    is_log = (isinstance(func, ast.Attribute) and
+                              isinstance(func.value, ast.Name) and
+                              func.value.id == "logging")
+                    is_print = isinstance(func, ast.Name) and func.id == "print"
+                    if not (is_log or is_print):
+                        continue
+                    # Does the call reference the exception variable?
+                    src = ast.unparse(call)
+                    if exc_var not in src:
+                        issues.append(f"{path}:{call.lineno}: log message doesn't include exception variable '{exc_var}' — add it for easier debugging")
+if issues:
+    for i in issues[:3]:  # cap at 3 to keep output readable
+        print(i)
+PY
+)
+  if [[ -n "$found" ]]; then
+    warn "exception variable not included in log message (harder to debug):\n    $found"
+    return 1
+  fi
+  return 0
+}
+
+check_ruff() {
+  # Usage: check_ruff <dir> [<select>]
+  # Runs ruff on <dir> if available; warns on violations.
+  # Default select: F401 (unused imports), E302 (missing blank lines).
+  local dir="${1:-.}"
+  local select="${2:-F401,E302,E303}"
+  if ! command -v ruff &>/dev/null && ! python3 -m ruff --version &>/dev/null 2>&1; then
+    return 0  # ruff not installed — skip silently
+  fi
+  local out
+  out=$(python3 -m ruff check --select="$select" --output-format=text "$dir" 2>/dev/null || true)
+  if [[ -n "$out" ]]; then
+    local count
+    count=$(echo "$out" | grep -c "\.py:" || true)
+    warn "$dir: ruff found $count style issue(s) (unused imports / missing blank lines) — run 'ruff check $dir' to see details"
+    return 1
+  fi
+  pass "$dir: no ruff style issues (F401/E302/E303)"
+  return 0
+}
diff --git a/.hyf/test.sh b/.hyf/test.sh
old mode 100644
new mode 100755
index ee037fc..4492dcb
--- a/.hyf/test.sh
+++ b/.hyf/test.sh
@@ -1,13 +1,278 @@
 #!/usr/bin/env bash
+# Week 6 autograder: static analysis only. The pipeline targets Azure (Blob
+# Storage, managed Postgres, Container App Jobs) which we cannot reach from a
+# GitHub Actions runner without secrets. The grader therefore verifies code
+# shape — env-var reads, the closing() pattern, the upsert SQL, Dockerfile
+# layer order, the AI report, and a screenshot — instead of a live deployment.
+#
+# Total points: 100. Passing score: 60.
 set -euo pipefail
 
-# Run your test scripts here.
-# Auto grade tool will execute this file within the .hyf working directory.
-# The result should be stored in score.json file with the format shown below.
-cat << EOF > score.json
-{
-  "score": 0,
-  "pass": true,
-  "passingScore": 0
-}
-EOF
+SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
+REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)"
+
+# shellcheck source=.hyf/grader_lib.sh
+source "$SCRIPT_DIR/grader_lib.sh"
+
+# Initialise score.json to 0/fail immediately so a mid-script crash leaves a
+# meaningful artefact behind instead of a stale score.
+cat > "$SCRIPT_DIR/score.json" <<'INIT'
+{"score": 0, "pass": false, "passingScore": 60}
+INIT
+
+score=0
+PASSING=60
+
+# ── Level 1 (10 pts): required files exist ──────────────────────────────────
+l1=0
+required_files=(
+  "Dockerfile"
+  "requirements.txt"
+  "src/pipeline.py"
+  "AI_ASSIST.md"
+  "README.md"
+)
+missing=0
+for f in "${required_files[@]}"; do
+  if [[ -f "$REPO_ROOT/$f" ]]; then
+    pass "found $f"
+  else
+    fail "missing $f"
+    ((missing += 1))
+  fi
+done
+if [[ -d "$REPO_ROOT/docs" ]]; then
+  pass "found docs/ directory"
+else
+  fail "missing docs/ directory (Task 5 screenshot lives here)"
+  ((missing += 1))
+fi
+if [[ "$missing" -eq 0 ]]; then
+  l1=10
+fi
+((score += l1))
+pass "Level 1: required files ($l1/10 pts)"
+
+# ── Level 2 (10 pts): pinned dependencies ───────────────────────────────────
+l2=0
+req="$REPO_ROOT/requirements.txt"
+if [[ -f "$req" ]]; then
+  # Pinned line for the blob SDK
+  if grep -qE "^azure-storage-blob==" "$req"; then
+    ((l2 += 5)); pass "requirements.txt pins azure-storage-blob"
+  else
+    fail "requirements.txt does not pin azure-storage-blob (expected line like 'azure-storage-blob==12.x.y')"
+  fi
+  # Pinned line for the Postgres driver
+  if grep -qE "^psycopg2(-binary)?==" "$req"; then
+    ((l2 += 5)); pass "requirements.txt pins psycopg2-binary"
+  else
+    fail "requirements.txt does not pin psycopg2-binary (expected line like 'psycopg2-binary==2.x.y')"
+  fi
+fi
+((score += l2))
+pass "Level 2: pinned dependencies ($l2/10 pts)"
+
+# ── Level 3 (10 pts): Dockerfile layer order ────────────────────────────────
+l3=0
+df="$REPO_ROOT/Dockerfile"
+if [[ -f "$df" ]]; then
+  if grep -qE "^FROM[[:space:]]+python:3\.11" "$df"; then
+    ((l3 += 3)); pass "Dockerfile uses a python:3.11 base image"
+  else
+    fail "Dockerfile does not use a python:3.11 base image"
+  fi
+  req_line=$(grep -nE "^COPY[[:space:]].*requirements" "$df" | head -1 | cut -d: -f1 || echo 0)
+  src_line=$(grep -nE "^COPY[[:space:]].*src" "$df" | head -1 | cut -d: -f1 || echo 9999)
+  if [[ "$req_line" -gt 0 && "$src_line" -lt 9999 && "$req_line" -lt "$src_line" ]]; then
+    ((l3 += 5)); pass "Dockerfile copies requirements before src/ (layer cache stays warm)"
+  else
+    fail "Dockerfile does not copy requirements.txt before src/ (cache-unfriendly)"
+  fi
+  if grep -qE "^CMD" "$df" && ! grep -qE 'CMD.*Task 4 still pending' "$df"; then
+    ((l3 += 2)); pass "Dockerfile has a real CMD instruction"
+  else
+    fail "Dockerfile CMD is still the placeholder — replace it with the pipeline entry point"
+  fi
+fi
+((score += l3))
+pass "Level 3: Dockerfile ($l3/10 pts)"
+
+# ── Level 4 (15 pts): pipeline shape (env vars, closing, Azure logger) ──────
+l4=0
+py="$REPO_ROOT/src/pipeline.py"
+if [[ -f "$py" ]]; then
+  # 4a: reads both env vars
+  env_count=$(grep -cE 'os\.environ\[.*(POSTGRES_URL|AZURE_STORAGE_CONNECTION_STRING).*\]|os\.environ\.get\(.*(POSTGRES_URL|AZURE_STORAGE_CONNECTION_STRING).*\)|os\.getenv\(.*(POSTGRES_URL|AZURE_STORAGE_CONNECTION_STRING).*\)' "$py" || true)
+  if [[ "$env_count" -ge 2 ]]; then
+    ((l4 += 5)); pass "pipeline.py reads POSTGRES_URL and AZURE_STORAGE_CONNECTION_STRING from env"
+  else
+    fail "pipeline.py does not read both POSTGRES_URL and AZURE_STORAGE_CONNECTION_STRING from os.environ"
+  fi
+  # 4b: closing() pattern from contextlib (Chapter 4 deliverable)
+  if grep -qE "from contextlib import closing" "$py" && grep -qE "with closing\(" "$py"; then
+    ((l4 += 5)); pass "pipeline.py uses contextlib.closing() to wrap the Postgres connection"
+  else
+    fail "pipeline.py does not use 'from contextlib import closing' + 'with closing(...)' (Chapter 4 pattern)"
+  fi
+  # 4c: silences Azure SDK logger noise (Chapter 5 deliverable)
+  if grep -qE 'logging\.getLogger\(.azure.\)\.setLevel' "$py"; then
+    ((l4 += 5)); pass "pipeline.py silences the azure SDK logger"
+  else
+    fail "pipeline.py does not silence the azure SDK logger (logging.getLogger(\"azure\").setLevel(...))"
+  fi
+  # Final stub guard
+  if grep -q "raise NotImplementedError" "$py"; then
+    warn "pipeline.py still contains 'raise NotImplementedError' — finish the stubs before submitting"
+  fi
+fi
+((score += l4))
+pass "Level 4: pipeline shape ($l4/15 pts)"
+
+# ── Level 5 (15 pts): idempotent upsert ─────────────────────────────────────
+l5=0
+if [[ -f "$py" ]]; then
+  # Match the SQL keyword pair in one line (covers single-line or formatted SQL)
+  # plus a fallback that allows them to be on separate lines.
+  if grep -ciE "ON CONFLICT" "$py" >/dev/null && grep -ciE "DO UPDATE" "$py" >/dev/null; then
+    ((l5 += 10)); pass "pipeline.py uses an upsert (ON CONFLICT ... DO UPDATE)"
+  else
+    fail "pipeline.py does not use ON CONFLICT ... DO UPDATE (idempotent upsert)"
+  fi
+  # Reward parameterised SQL with %s placeholders (Postgres) — never f-strings.
+  # Use Python (not grep) because the placeholders typically live on a
+  # different line from the execute( call inside multi-line SQL strings.
+  if python3 - "$py" <<'PYCHECK' >/dev/null 2>&1
+import re, sys
+src = open(sys.argv[1]).read()
+# Find every cur.execute(...) / cursor.execute(...) call and check for %s
+# placeholders inside the call expression (parentheses, possibly multi-line).
+hits = re.findall(r"\bexecute\s*\(((?:[^()]|\([^()]*\))*)\)", src, re.DOTALL)
+ok = any("%s" in chunk for chunk in hits)
+sys.exit(0 if ok else 1)
+PYCHECK
+  then
+    ((l5 += 5)); pass "pipeline.py uses %s placeholders in execute() (parameterised SQL)"
+  else
+    fail "pipeline.py does not use %s placeholders for parameterised SQL"
+  fi
+fi
+((score += l5))
+pass "Level 5: idempotent upsert ($l5/15 pts)"
+
+# ── Level 6 (10 pts): connection string + SDK use ───────────────────────────
+l6=0
+# sslmode=require somewhere visible: .env.example or pipeline default. The
+# point is to show the student knows Azure Postgres needs SSL.
+if grep -rqE "sslmode=require" "$REPO_ROOT" --include="*.py" --include=".env.example" --include="*.md" --exclude-dir=".git" 2>/dev/null; then
+  ((l6 += 5)); pass "connection string includes sslmode=require"
+else
+  fail "no mention of sslmode=require in the repo — Azure Postgres rejects connections without it"
+fi
+# Uses the Azure Blob SDK (not raw HTTP or az CLI shellouts)
+if [[ -f "$py" ]] && grep -qE "BlobServiceClient|from azure\.storage\.blob" "$py"; then
+  ((l6 += 5)); pass "pipeline.py uses the azure-storage-blob SDK (BlobServiceClient)"
+else
+  fail "pipeline.py does not use the azure-storage-blob SDK (BlobServiceClient)"
+fi
+((score += l6))
+pass "Level 6: connection + SDK ($l6/10 pts)"
+
+# ── Level 7 (10 pts): AI_ASSIST.md filled in ────────────────────────────────
+l7=0
+ai="$REPO_ROOT/AI_ASSIST.md"
+if [[ -f "$ai" ]]; then
+  chars=$(wc -c < "$ai" | tr -d ' ')
+  has_prompt=$(grep -c "## The prompt" "$ai" || true)
+  has_code=$(grep -c "## The code" "$ai" || true)
+  has_changed=$(grep -c "## What I changed" "$ai" || true)
+  has_todo=$(grep -cE "^TODO:|^TODO " "$ai" || true)
+
+  sections_ok=false
+  filled_in=false
+  if [[ "$has_prompt" -ge 1 && "$has_code" -ge 1 && "$has_changed" -ge 1 ]]; then
+    sections_ok=true
+  fi
+  if [[ "$sections_ok" = true && "$chars" -ge 1800 && "$has_todo" -eq 0 ]]; then
+    filled_in=true
+  fi
+  # All-or-nothing: scaffolds that ship the section headers do not earn points
+  # without real content.
+  if [[ "$filled_in" = true ]]; then
+    l7=10
+    pass "AI_ASSIST.md has all three sections and is filled in (${chars} chars)"
+  else
+    if [[ "$sections_ok" = true ]]; then
+      fail "AI_ASSIST.md has section headers but is not filled in (${chars} chars, ${has_todo} TODO line(s); target 1800+ chars, 0 TODOs)"
+    else
+      fail "AI_ASSIST.md is missing one of the three required sections"
+    fi
+  fi
+fi
+((score += l7))
+pass "Level 7: AI report ($l7/10 pts)"
+
+# ── Level 8 (10 pts): README verification section + image link ──────────────
+l8=0
+rm="$REPO_ROOT/README.md"
+if [[ -f "$rm" ]]; then
+  heading_ok=false
+  image_ok=false
+  if grep -qE "^##[[:space:]]+Verification[[:space:]]*$" "$rm"; then
+    heading_ok=true
+  fi
+  if grep -qE '!\[[^]]*\]\(docs/[^)]+\.(png|jpg|jpeg|gif)\)' "$rm"; then
+    image_ok=true
+  fi
+  # All-or-nothing: the heading without an embedded image is just a template,
+  # so the scaffold cannot drift past 0/10 by accident.
+  if [[ "$heading_ok" = true && "$image_ok" = true ]]; then
+    l8=10
+    pass "README.md has '## Verification' heading and embeds a docs/ image"
+  else
+    if [[ "$heading_ok" = false ]]; then
+      fail "README.md is missing the '## Verification' heading (Task 5)"
+    fi
+    if [[ "$image_ok" = false ]]; then
+      fail "README.md does not embed a docs/ image with ![...](docs/...png)"
+    fi
+  fi
+fi
+((score += l8))
+pass "Level 8: README verification ($l8/10 pts)"
+
+# ── Level 9 (10 pts): Execution-history screenshot present ──────────────────
+l9=0
+shot=""
+for candidate in "$REPO_ROOT/docs/execution_history.png" \
+                 "$REPO_ROOT/docs/execution_history.jpg" \
+                 "$REPO_ROOT/docs/execution_history.jpeg"; do
+  if [[ -s "$candidate" ]]; then
+    shot="$candidate"
+    break
+  fi
+done
+if [[ -n "$shot" ]]; then
+  size=$(wc -c < "$shot" | tr -d ' ')
+  if [[ "$size" -gt 5000 ]]; then
+    if [[ "$shot" == *.png ]]; then
+      ((l9 += 10)); pass "execution-history screenshot present at $(basename "$shot") (${size} bytes)"
+    else
+      ((l9 += 5)); warn "execution-history screenshot present at $(basename "$shot") but should be .png (partial credit, ${size} bytes)"
+    fi
+  else
+    fail "execution-history screenshot at $(basename "$shot") looks too small to be a real screenshot (${size} bytes)"
+  fi
+else
+  fail "docs/execution_history.png not found (Task 5 deliverable)"
+fi
+((score += l9))
+pass "Level 9: execution screenshot ($l9/10 pts)"
+
+# ── Code hygiene warnings (no points; just feedback) ────────────────────────
+check_no_print_statements "$REPO_ROOT/src" "src/"
+check_gitignore_python "$REPO_ROOT/.gitignore"
+
+# ── Final result ────────────────────────────────────────────────────────────
+print_results "Week 6 Autograder"
+write_score "$score" "$PASSING" "$SCRIPT_DIR/score.json"
diff --git a/AI_ASSIST.md b/AI_ASSIST.md
new file mode 100644
index 0000000..9b31b94
--- /dev/null
+++ b/AI_ASSIST.md
@@ -0,0 +1,25 @@
+# AI Assist Report
+
+> Task 7: Fill in all three sections below with specifics. "The AI fixed it"
+> is not enough detail. Show the exact prompt you used, paste the suggestion
+> you received, and explain what you changed and why.
+
+## The prompt I gave
+
+<!-- Paste the exact prompt you gave an LLM (ChatGPT, Claude, Copilot, etc.). -->
+
+TODO: paste your prompt here.
+
+## The code or suggestion it returned
+
+<!-- Paste the suggestion verbatim — code, shell commands, or both. -->
+
+```text
+TODO: paste the AI output here.
+```
+
+## What I changed after reviewing it
+
+<!-- Describe what you accepted, rejected, or modified, and why. -->
+
+TODO: describe your review here.
diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..ee3c978
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,21 @@
+# Task 4: containerise the pipeline so Azure Container Apps Jobs can run it.
+#
+# Requirements (mirror Week 5):
+# 1. Base image: python:3.11-slim.
+# 2. Copy requirements.txt BEFORE copying src/ so the install layer stays cached.
+# 3. Install dependencies from requirements.txt.
+# 4. Copy src/ into the image.
+# 5. Default command runs the pipeline module.
+
+FROM python:3.11-slim
+
+WORKDIR /app
+
+# TODO Task 4: copy requirements.txt (must appear before any COPY src command)
+
+# TODO Task 4: install dependencies with pip
+
+# TODO Task 4: copy the src/ folder
+
+# TODO Task 4: set the CMD to run the pipeline (python -m src.pipeline)
+CMD ["python", "-c", "raise SystemExit('Dockerfile not finished: Task 4 still pending')"]
diff --git a/README.md b/README.md
index 96ce7bc..fa94600 100644
--- a/README.md
+++ b/README.md
@@ -1,17 +1,110 @@
-# [Track] week X assignment
-HackYourFuture <Track> week X assignment
-The Week X assignment for the HackYourFuture <TRACK> can be found at the following link: [TODO: Assignment url in the learning platform]
+# Week 6 Assignment: Deploy to Azure
 
+Take the containerised pipeline from Week 5 and ship it to Azure. The pipeline
+writes raw JSON to Azure Blob Storage and structured rows to Azure Database
+for PostgreSQL, then runs on a schedule from an Azure Container App Job.
 
-## Implementation Instructions
+The full assignment chapter (with all task instructions) lives in your
+HackYourFuture Notion curriculum under Week 6.
 
-Provide clear instructions on how trainees should implement the tasks.
+## Project structure
 
-### Task 1
-Instructions for Task 1
+```text
+data-assignment-week-6/
+├── .github/workflows/
+│   └── grade-assignment.yml    Triggers the auto-grader on every PR
+├── .devcontainer/
+│   └── devcontainer.json       Codespaces dev container (Python + Azure CLI)
+├── .hyf/
+│   ├── test.sh                 The auto-grader (run locally with `bash .hyf/test.sh`)
+│   └── grader_lib.sh           Shared helpers used by test.sh
+├── docs/
+│   └── execution_history.png   Task 5: portal screenshot you upload (PNG)
+├── src/
+│   └── pipeline.py             Task 3: blob upload + Postgres upsert
+├── Dockerfile                  Task 4: container image for the job
+├── requirements.txt            Task 2: pinned Python deps
+├── AI_ASSIST.md                Task 7: LLM prompt + your review
+└── README.md                   This file
+```
 
-### Task 2
-Instructions for Task 2
+## Where to start
 
-...
+| Step | File | Task in the chapter |
+|---|---|---|
+| 1 | `requirements.txt` | Pin `azure-storage-blob` and `psycopg2-binary` |
+| 2 | `src/pipeline.py` | Implement `get_config`, `upload_raw_to_blob`, `write_to_postgres` (Tasks 1-3) |
+| 3 | `Dockerfile` | Finish the cache-friendly image (Task 4) |
+| 4 | Azure CLI | Deploy as a Container App Job (Task 4-5) |
+| 5 | `docs/execution_history.png` | Add the Execution-history portal screenshot (Task 5) |
+| 6 | `AI_ASSIST.md` | Fill in your AI prompt + review (Task 7) |
 
+## Open in Codespaces
+
+> Codespaces ships Python 3.11 + the Azure CLI. Sign in with your HackYourFuture
+> Azure account targeting the HYF tenant:
+
+```bash
+az login --use-device-code --tenant 07a14c4e-d88c-42f7-83b3-13af7e57ff3d
+```
+
+## Run the pipeline locally
+
+```bash
+python -m venv .venv && source .venv/bin/activate
+pip install -r requirements.txt
+cp .env.example .env  # fill in real connection strings, never commit them
+set -a && source .env && set +a
+python -m src.pipeline
+```
+
+## Verifying your deployment (Task 5)
+
+After deploying the Container App Job and triggering a run, capture proof:
+
+1. Open the Azure portal, find your Container App Job, open the **Execution
+   history** blade.
+2. Screenshot the most recent successful run.
+3. Save the screenshot to `docs/`.
+4. Replace this whole section with one called `## Verification` and embed
+   your screenshot using a Markdown image link. The grader looks for the
+   `## Verification` heading and a `![alt](docs/your-file.png)` reference
+   pointing at the image you committed.
+
+## Check your score locally
+
+```bash
+bash .hyf/test.sh
+cat .hyf/score.json
+```
+
+The grader reports a score out of 100. The passing threshold is 60.
+
+## Scoring ladder
+
+| Points | What the grader checks |
+|---|---|
+| 10 | Required files exist (Dockerfile, requirements.txt, src/pipeline.py, AI_ASSIST.md, docs/) |
+| 10 | requirements.txt pins `azure-storage-blob` and `psycopg2-binary` |
+| 10 | Dockerfile copies requirements before src (cache-friendly layer order) |
+| 15 | Pipeline reads both env vars, wraps the Postgres connection so it is closed cleanly, and silences the Azure SDK logger |
+| 15 | Pipeline uses an idempotent upsert (`ON CONFLICT ... DO UPDATE`) |
+| 10 | Connection string uses the Azure-required SSL flag and the blob SDK client class |
+| 10 | AI_ASSIST.md has all three sections and is filled in (>=1800 chars, no `TODO:`) |
+| 10 | README has a `## Verification` heading and references an image in `docs/` |
+| 10 | `docs/execution_history.png` exists and is non-trivial (real screenshot) |
+
+## Submitting
+
+1. Create a branch: `git switch -c week6/your-name`.
+2. Commit your work.
+3. Push and open a pull request against `main`.
+4. Share the PR URL with your teacher.
+
+## Instructor / maintainer notes
+
+This repository is built from the canonical Week 6 chapter in the curriculum
+repo (`Data Track/Week 6/week_6__8_assignment.md`). The auto-grader checks
+code shape, not live Azure deployment, because the GitHub Actions runner has
+no Azure credentials. To rebuild from a fresh scaffold, follow
+`.agents/workflows/build_assignment_repo.md` in the curriculum repo.
diff --git a/docs/.gitkeep b/docs/.gitkeep
new file mode 100644
index 0000000..5c26b83
--- /dev/null
+++ b/docs/.gitkeep
@@ -0,0 +1,2 @@
+# Keep the docs/ folder in git so students have a place to drop the Task 5
+# Execution-history screenshot (`docs/execution_history.png`).
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..b5d18d7
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,13 @@
+# Task 2 + Task 3: pin every dependency the pipeline uses.
+#
+# Format: package==version. Look up the latest stable version with:
+#   pip index versions <package>
+#
+# The pipeline needs the Azure Blob SDK and a Postgres driver. Add them below
+# with explicit pins.
+
+# TODO: pin azure-storage-blob (uncomment and add a version)
+# azure-storage-blob==
+
+# TODO: pin psycopg2-binary (uncomment and add a version)
+# psycopg2-binary==
diff --git a/task-1/task 1 files b/src/__init__.py
similarity index 100%
rename from task-1/task 1 files
rename to src/__init__.py
diff --git a/src/pipeline.py b/src/pipeline.py
new file mode 100644
index 0000000..ef1fc8a
--- /dev/null
+++ b/src/pipeline.py
@@ -0,0 +1,103 @@
+"""Week 6 assignment: Azure-deployed data pipeline.
+
+This pipeline replaces the local file output from Week 5 with two cloud
+targets: raw JSON in Azure Blob Storage and structured rows in Azure
+Database for PostgreSQL. When you finish the assignment it will run as a
+Container App Job triggered from the Azure Portal or the CLI.
+
+Replace every `raise NotImplementedError` below with a real implementation.
+
+Reference chapters:
+- Blob upload:      Data Track/Week 6/week_6__3_azure_blob_storage.md
+- Postgres connect: Data Track/Week 6/week_6__4_azure_postgresql.md
+- Container Job:    Data Track/Week 6/week_6__5_container_apps_jobs.md
+"""
+
+import logging
+import os
+from datetime import date
+
+logging.basicConfig(level=logging.INFO, format="%(levelname)s %(message)s")
+logger = logging.getLogger(__name__)
+
+# TASK 3 hint: quiet the Azure SDK so its DEBUG output does not drown your own
+# pipeline logs. The right call lives in Chapter 5 (Viewing logs).
+
+
+def get_config() -> dict:
+    """Return configuration read from environment variables.
+
+    Required:
+        - POSTGRES_URL: full Azure Postgres connection string.
+        - AZURE_STORAGE_CONNECTION_STRING: blob storage account connection string.
+
+    Optional:
+        - SOURCE_NAME: logical source label, default "weather".
+        - LOG_LEVEL: not parsed here; the orchestrator sets it via env var.
+
+    Raise RuntimeError with a clear message if a required variable is missing.
+    """
+    raise NotImplementedError(
+        "Task 3: read POSTGRES_URL and AZURE_STORAGE_CONNECTION_STRING from os.environ"
+    )
+
+
+def fetch_records() -> list[dict]:
+    """Return a small batch of records to ingest.
+
+    In a real pipeline you would call an API here. Return a list of at least
+    one dict with a stable key set (for example: station, timestamp,
+    temperature_c, humidity_pct).
+    """
+    raise NotImplementedError("Task 3: return a list of at least one record")
+
+
+def upload_raw_to_blob(records: list[dict], blob_conn_str: str, source: str) -> str:
+    """Upload the raw records as a single JSON blob and return its name.
+
+    The blob name must follow the assignment convention:
+        raw/<source>/<YYYY-MM-DD>.json
+
+    Use the azure-storage-blob SDK. The target container is "raw" (your
+    teacher has pre-created it). Overwrite if the blob already exists so
+    same-day reruns succeed.
+    """
+    raise NotImplementedError("Task 1 + Task 3: upload records to blob storage")
+
+
+def write_to_postgres(records: list[dict], postgres_url: str) -> int:
+    """Insert (or upsert) records into Azure Postgres. Return the row count.
+
+    Steps:
+        1. Open a psycopg2 connection wrapped so it is closed cleanly when the
+           function returns, even on error.
+        2. Ensure the target table exists (create it if missing).
+        3. Insert each record. The pipeline must be safe to rerun on the same
+           day: a re-run must update rather than fail.
+        4. Commit and return the number of rows written.
+
+    See Chapter 4 for the connection-and-cursor pattern this is based on.
+    """
+    raise NotImplementedError("Task 2 + Task 3: insert rows into Azure Postgres")
+
+
+def run() -> None:
+    config = get_config()
+    logger.info("starting pipeline")
+    records = fetch_records()
+
+    blob_name = upload_raw_to_blob(
+        records,
+        config["azure_storage_connection_string"],
+        config["source_name"],
+    )
+    logger.info("uploaded blob %s", blob_name)
+
+    row_count = write_to_postgres(records, config["postgres_url"])
+    logger.info("wrote %d rows to postgres", row_count)
+
+    logger.info("pipeline complete (today=%s)", date.today().isoformat())
+
+
+if __name__ == "__main__":
+    run()
diff --git a/task-2/task 2 files b/task-2/task 2 files
deleted file mode 100644
index e69de29..0000000

From a5b7e639f9ccb2fdbf4331f8a0ee4f6fcc834e66 Mon Sep 17 00:00:00 2001
From: Lasse Benninga <devops.pipeline@example.com>
Date: Wed, 3 Jun 2026 17:24:42 +0200
Subject: [PATCH 2/2] fix(grader): tighten psycopg2 pin + env-var checks (PR #1
 review)

- Level 2 psycopg2 pin check no longer accepts bare `psycopg2==...`.
  The python:3.11-slim base image lacks libpq-dev and gcc, so the
  source dist fails to build; accepting it gave students credit for
  a requirements.txt that immediately broke `docker build`.
- Level 4a env-var check no longer counts matching lines. Two
  independent `grep -q` calls accept any valid read pattern
  (separate lines, one-line tuple read, dict comprehension), so the
  grader stops false-failing correct submissions.
---
 .hyf/test.sh | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/.hyf/test.sh b/.hyf/test.sh
index 4492dcb..6b4aed4 100755
--- a/.hyf/test.sh
+++ b/.hyf/test.sh
@@ -63,8 +63,11 @@ if [[ -f "$req" ]]; then
   else
     fail "requirements.txt does not pin azure-storage-blob (expected line like 'azure-storage-blob==12.x.y')"
   fi
-  # Pinned line for the Postgres driver
-  if grep -qE "^psycopg2(-binary)?==" "$req"; then
+  # Pinned line for the Postgres driver. Require -binary explicitly: the
+  # source psycopg2 needs libpq-dev + gcc, which python:3.11-slim does not
+  # ship, so accepting bare psycopg2 here would give a student credit for
+  # a requirements.txt that breaks `docker build`.
+  if grep -qE "^psycopg2-binary==" "$req"; then
     ((l2 += 5)); pass "requirements.txt pins psycopg2-binary"
   else
     fail "requirements.txt does not pin psycopg2-binary (expected line like 'psycopg2-binary==2.x.y')"
@@ -102,9 +105,10 @@ pass "Level 3: Dockerfile ($l3/10 pts)"
 l4=0
 py="$REPO_ROOT/src/pipeline.py"
 if [[ -f "$py" ]]; then
-  # 4a: reads both env vars
-  env_count=$(grep -cE 'os\.environ\[.*(POSTGRES_URL|AZURE_STORAGE_CONNECTION_STRING).*\]|os\.environ\.get\(.*(POSTGRES_URL|AZURE_STORAGE_CONNECTION_STRING).*\)|os\.getenv\(.*(POSTGRES_URL|AZURE_STORAGE_CONNECTION_STRING).*\)' "$py" || true)
-  if [[ "$env_count" -ge 2 ]]; then
+  # 4a: reads both env vars. Check each var name independently rather than
+  # counting matching lines: a one-line tuple read or a dict comprehension
+  # would otherwise false-fail a correct submission.
+  if grep -qE "POSTGRES_URL" "$py" && grep -qE "AZURE_STORAGE_CONNECTION_STRING" "$py"; then
     ((l4 += 5)); pass "pipeline.py reads POSTGRES_URL and AZURE_STORAGE_CONNECTION_STRING from env"
   else
     fail "pipeline.py does not read both POSTGRES_URL and AZURE_STORAGE_CONNECTION_STRING from os.environ"