zhouning · zhouning · Mar 15, 2026 · Mar 15, 2026 · Mar 16, 2026 · Mar 18, 2026
diff --git a/.chainlit/config.toml b/.chainlit/config.toml
@@ -3,6 +3,7 @@
 name = "Data Agent"
 user_env = []
 duration = 0
+watch_ignore = ["data_agent/uploads/**", "data_agent/downloads/**", "*.log", "*.db"]
 
 [features]
 prompt_playground = true
@@ -25,7 +26,7 @@ generated_by = "0.7.700"
 
 [features.spontaneous_file_upload]
 enabled = true
-accept = ["text/csv", "application/zip", ".shp", ".shx", ".dbf"]
+accept = ["text/csv", "application/zip", "application/x-zip-compressed", "application/octet-stream", ".zip", ".shp", ".shx", ".dbf", ".prj", ".cpg", ".geojson", ".gpkg", ".kml", ".kmz", ".tif", ".tiff", ".xlsx", ".xls", ".json", ".dwg", ".dxf", ".obj", ".stl"]
 max_size_mb = 500
 
 # Starters

diff --git a/.files/3a4780d0-6111-4ab3-a150-12264948919e/de1cadc5-c814-439c-af57-67dd3c11a7ee.pyz b/.files/3a4780d0-6111-4ab3-a150-12264948919e/de1cadc5-c814-439c-af57-67dd3c11a7ee.pyz
diff --git a/.github/workflows/cd-production.yml b/.github/workflows/cd-production.yml
@@ -0,0 +1,150 @@
+# =============================================================================
+# CD Pipeline — Production Deployment (Gated)
+#
+# Phase 3 of 3-phase CI/CD (per Google AgentOps whitepaper)
+# Requires:
+#   1. Staging validation passed (cd-staging.yml)
+#   2. Manual approval from Product Owner (GitHub Environment protection)
+#
+# Supports canary rollout via CANARY_WEIGHT variable.
+# =============================================================================
+name: CD - Production
+
+on:
+  workflow_dispatch:
+    inputs:
+      canary_weight:
+        description: 'Canary traffic weight (0-100, 0=skip canary)'
+        required: false
+        default: '10'
+      skip_eval:
+        description: 'Skip pre-deploy evaluation'
+        required: false
+        default: 'false'
+
+jobs:
+  # ---------------------------------------------------------------------------
+  # Gate 1: Final evaluation before production
+  # ---------------------------------------------------------------------------
+  pre-deploy-eval:
+    name: Pre-Deploy Evaluation
+    if: ${{ github.event.inputs.skip_eval != 'true' }}
+    runs-on: ubuntu-latest
+    services:
+      postgres:
+        image: postgis/postgis:16-3.4
+        env:
+          POSTGRES_DB: gis_agent
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: prod_eval_password
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+    steps:
+      - uses: actions/checkout@v4
+      - uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+      - run: pip install -r requirements.txt
+      - name: Run evaluation suite
+        env:
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          POSTGRES_HOST: localhost
+          POSTGRES_PORT: 5432
+          POSTGRES_DATABASE: gis_agent
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: prod_eval_password
+        run: python data_agent/run_evaluation.py --num-runs 3
+      - name: Check evaluation verdict
+        run: |
+          if [ -f eval_results/eval_summary.json ]; then
+            VERDICT=$(python -c "import json; d=json.load(open('eval_results/eval_summary.json')); print(d.get('overall_pass', False))")
+            if [ "$VERDICT" != "True" ]; then
+              echo "❌ Evaluation FAILED. Blocking production deployment."
+              exit 1
+            fi
+            echo "✅ Evaluation PASSED."
+          fi
+
+  # ---------------------------------------------------------------------------
+  # Gate 2: Manual approval (Product Owner sign-off)
+  # ---------------------------------------------------------------------------
+  approval:
+    name: Production Approval
+    needs: pre-deploy-eval
+    if: always() && (needs.pre-deploy-eval.result == 'success' || needs.pre-deploy-eval.result == 'skipped')
+    runs-on: ubuntu-latest
+    environment: production  # GitHub Environment with required reviewers
+    steps:
+      - name: Approved for production
+        run: echo "✅ Production deployment approved."
+
+  # ---------------------------------------------------------------------------
+  # Deploy: Canary → Full rollout
+  # ---------------------------------------------------------------------------
+  deploy:
+    name: Deploy to Production
+    needs: approval
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Build Docker image
+        run: |
+          docker build -t gis-data-agent:${{ github.sha }} .
+          docker tag gis-data-agent:${{ github.sha }} gis-data-agent:latest
+          echo "Built image: gis-data-agent:${{ github.sha }}"
+
+      - name: Canary deployment
+        if: ${{ github.event.inputs.canary_weight != '0' }}
+        run: |
+          echo "🐤 Canary deployment with ${{ github.event.inputs.canary_weight }}% traffic"
+          echo "In production, this would:"
+          echo "  1. Deploy new version alongside current"
+          echo "  2. Route ${{ github.event.inputs.canary_weight }}% traffic to new version"
+          echo "  3. Monitor error rates and latency for 15 minutes"
+          echo "  4. If healthy, proceed to full rollout"
+          echo "  5. If unhealthy, automatic rollback"
+          echo ""
+          echo "Implementation options:"
+          echo "  - Cloud Run: gcloud run services update-traffic --to-revisions=NEW=${{ github.event.inputs.canary_weight }}"
+          echo "  - K8s: kubectl apply -f k8s/canary-ingress.yaml"
+          echo "  - Docker: CANARY_WEIGHT=${{ github.event.inputs.canary_weight }} docker compose -f docker-compose.prod.yml up -d"
+
+      - name: Full rollout
+        run: |
+          echo "🚀 Full production rollout"
+          echo "Image: gis-data-agent:${{ github.sha }}"
+          echo "Commit: ${{ github.sha }}"
+          echo ""
+          echo "docker compose -f docker-compose.yml -f docker-compose.prod.yml up -d"
+
+      - name: Record deployment
+        run: |
+          echo "📝 Deployment record:"
+          echo "  Version: ${{ github.sha }}"
+          echo "  Time: $(date -u +%Y-%m-%dT%H:%M:%SZ)"
+          echo "  Deployer: ${{ github.actor }}"
+          echo "  Canary: ${{ github.event.inputs.canary_weight }}%"
+
+  # ---------------------------------------------------------------------------
+  # Post-deploy smoke test
+  # ---------------------------------------------------------------------------
+  smoke-test:
+    name: Post-Deploy Smoke Test
+    needs: deploy
+    runs-on: ubuntu-latest
+    steps:
+      - name: Health check
+        run: |
+          echo "🔍 Running smoke tests against production..."
+          echo "  - Health endpoint: GET /health/live"
+          echo "  - Readiness: GET /health/ready"
+          echo "  - API response: GET /api/capabilities"
+          echo ""
+          echo "In production, these would be real HTTP checks."
+          echo "If any fail, trigger automatic rollback."
diff --git a/.github/workflows/cd-staging.yml b/.github/workflows/cd-staging.yml
@@ -0,0 +1,161 @@
+# =============================================================================
+# CD Pipeline — Staging Deployment + Evaluation
+#
+# Triggered: After merge to main (post CI pass)
+# Phase 2 of 3-phase CI/CD (per Google AgentOps whitepaper)
+# =============================================================================
+name: CD - Staging
+
+on:
+  push:
+    branches: [main]
+  workflow_dispatch:
+
+jobs:
+  deploy-staging:
+    name: Deploy to Staging
+    runs-on: ubuntu-latest
+    services:
+      postgres:
+        image: postgis/postgis:16-3.4
+        env:
+          POSTGRES_DB: gis_agent_staging
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: staging_password
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Install dependencies
+        run: |
+          pip install -r requirements.txt
+          pip install pytest
+
+      - name: Run full test suite (staging validation)
+        env:
+          POSTGRES_HOST: localhost
+          POSTGRES_PORT: 5432
+          POSTGRES_DATABASE: gis_agent_staging
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: staging_password
+          DEPLOY_ENV: staging
+        run: |
+          python -m pytest data_agent/ \
+            --ignore=data_agent/test_knowledge_agent.py \
+            -q --tb=short --junitxml=staging-test-results.xml
+
+      - name: Upload staging test results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: staging-test-results
+          path: staging-test-results.xml
+
+  evaluate-staging:
+    name: Agent Evaluation (Staging)
+    needs: deploy-staging
+    runs-on: ubuntu-latest
+    if: ${{ vars.GOOGLE_API_KEY != '' || secrets.GOOGLE_API_KEY != '' }}
+    services:
+      postgres:
+        image: postgis/postgis:16-3.4
+        env:
+          POSTGRES_DB: gis_agent_staging
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: staging_password
+        ports:
+          - 5432:5432
+        options: >-
+          --health-cmd "pg_isready -U postgres"
+          --health-interval 10s
+          --health-timeout 5s
+          --health-retries 5
+
+    steps:
+      - uses: actions/checkout@v4
+
+      - name: Set up Python 3.13
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.13'
+
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+
+      - name: Run agent evaluation
+        env:
+          GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }}
+          GOOGLE_GENAI_USE_VERTEXAI: ${{ vars.GOOGLE_GENAI_USE_VERTEXAI }}
+          GOOGLE_CLOUD_PROJECT: ${{ vars.GOOGLE_CLOUD_PROJECT }}
+          GOOGLE_CLOUD_LOCATION: ${{ vars.GOOGLE_CLOUD_LOCATION || 'global' }}
+          POSTGRES_HOST: localhost
+          POSTGRES_PORT: 5432
+          POSTGRES_DATABASE: gis_agent_staging
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: staging_password
+          DEPLOY_ENV: staging
+        run: |
+          python data_agent/run_evaluation.py --num-runs 2 2>&1 | tee eval-output.log
+          echo "Evaluation complete"
+
+      - name: Record eval results to history
+        if: always()
+        env:
+          POSTGRES_HOST: localhost
+          POSTGRES_PORT: 5432
+          POSTGRES_DATABASE: gis_agent_staging
+          POSTGRES_USER: postgres
+          POSTGRES_PASSWORD: staging_password
+        run: |
+          python -c "
+          from data_agent.eval_history import ensure_eval_table, record_eval_result
+          import json, os
+          ensure_eval_table()
+          summary_path = 'eval_results/eval_summary.json'
+          if os.path.exists(summary_path):
+              with open(summary_path) as f:
+                  s = json.load(f)
+              for pipeline, verdict in s.get('pipeline_verdicts', {}).items():
+                  record_eval_result(
+                      pipeline=pipeline,
+                      overall_score=s.get('pass_rate', 0),
+                      pass_rate=s.get('pass_rate', 0),
+                      verdict=verdict,
+                  )
+              print(f'Recorded eval results: {s.get(\"pipeline_verdicts\", {})}')
+          "
+
+      - name: Upload evaluation results
+        uses: actions/upload-artifact@v4
+        if: always()
+        with:
+          name: staging-eval-results
+          path: |
+            eval_results/
+            eval-output.log
+
+  staging-approval:
+    name: Staging Sign-off
+    needs: [deploy-staging, evaluate-staging]
+    runs-on: ubuntu-latest
+    if: always() && needs.deploy-staging.result == 'success'
+    environment: staging  # Requires manual approval in GitHub
+    steps:
+      - name: Staging validated
+        run: |
+          echo "✅ Staging validation complete."
+          echo "Tests: ${{ needs.deploy-staging.result }}"
+          echo "Eval: ${{ needs.evaluate-staging.result }}"
+          echo "Ready for production deployment."
diff --git a/.gitignore b/.gitignore
@@ -95,3 +95,41 @@ data_agent/eval_results/
 /enhanced_*
 /reprojected_*
 /features_enhanced_*
+
+# World model paper (private, not for sharing)
+docs/world_model_paper.tex
+docs/world_model_paper_en.docx
+docs/world_model_paper_cn.docx
+docs/fig_architecture.png
+docs/world-model-technical-report.md
+docs/world-model-technical-review*.md
+docs/reviewer_comments*.md
+scripts/generate_paper_docx.py
+scripts/generate_architecture_fig.py
+scripts/ablation_study.py
+scripts/ablation_results/
+
+# Benchmark generated data and results
+benchmarks/data/
+benchmarks/benchmark_results.json
+
+# Papers and client-facing documents — NEVER commit
+docs/*paper*.tex
+docs/*paper*.docx
+docs/*paper*.pdf
+docs/*paper*.md
+docs/*paper*_cn.*
+docs/*paper*_en.*
+docs/*paper*_response*
+docs/generate_*_paper*
+docs/technical_paper_*
+docs/surveying_qc_demo_script.*
+docs/surveying_qc_agent_design.*
+
+# PDF and video files — NEVER commit (may contain sensitive data)
+*.pdf
+*.mp4
+*.avi
+*.mov
+*.wmv
+!docs/dita/out/pdf-css-html5/data-agent-user-guide.pdf