diff --git a/packages/backend/app/__init__.py b/packages/backend/app/__init__.py index cdf76b45..a8e8eaaa 100644 --- a/packages/backend/app/__init__.py +++ b/packages/backend/app/__init__.py @@ -41,7 +41,7 @@ def create_app(settings: Settings | None = None) -> Flask: logger = logging.getLogger("finmind") logger.info("Starting FinMind backend with log level %s", log_level) - # Extensions + # Extensions\n from .extensions import init_redis\n init_redis(cfg.redis_url) db.init_app(app) jwt.init_app(app) app.extensions["observability"] = Observability() diff --git a/packages/backend/app/db/schema.sql b/packages/backend/app/db/schema.sql index 410189de..cba586f1 100644 --- a/packages/backend/app/db/schema.sql +++ b/packages/backend/app/db/schema.sql @@ -123,3 +123,18 @@ CREATE TABLE IF NOT EXISTS audit_logs ( action VARCHAR(100) NOT NULL, created_at TIMESTAMP NOT NULL DEFAULT NOW() ); + + +-- GDPR Deletion Requests +CREATE TABLE IF NOT EXISTS deletion_requests ( + id SERIAL PRIMARY KEY, + user_id INT NOT NULL REFERENCES users(id) ON DELETE CASCADE, + requested_at TIMESTAMP NOT NULL DEFAULT NOW(), + scheduled_deletion_date TIMESTAMP NOT NULL, + ip_address VARCHAR(45), + user_agent VARCHAR(500), + cancelled BOOLEAN NOT NULL DEFAULT FALSE, + cancelled_at TIMESTAMP, + cancellation_ip VARCHAR(45) +); +CREATE INDEX IF NOT EXISTS idx_deletion_requests_user ON deletion_requests(user_id, cancelled); diff --git a/packages/backend/app/extensions.py b/packages/backend/app/extensions.py index bad98fae..114fedd8 100644 --- a/packages/backend/app/extensions.py +++ b/packages/backend/app/extensions.py @@ -1,11 +1,38 @@ from flask_sqlalchemy import SQLAlchemy from flask_jwt_extended import JWTManager import redis -from .config import Settings db = SQLAlchemy() jwt = JWTManager() -_settings = Settings() -redis_client = redis.Redis.from_url(_settings.redis_url, decode_responses=True) +# Lazy-initialized Redis client +_redis_client = None +_redis_url = None + + +def init_redis(redis_url: str): + """Initialize Redis client with the given URL.""" + global _redis_client, _redis_url + _redis_url = redis_url + _redis_client = redis.Redis.from_url(redis_url, decode_responses=True) + + +def get_redis(): + """Get the Redis client, initializing with default if needed.""" + global _redis_client + if _redis_client is None: + from .config import Settings + _settings = Settings() + init_redis(_settings.redis_url) + return _redis_client + + +# For backward compatibility, expose redis_client as a property-like getter +class RedisClientProxy: + """Proxy that delegates to the actual Redis client.""" + def __getattr__(self, name): + return getattr(get_redis(), name) + + +redis_client = RedisClientProxy() \ No newline at end of file diff --git a/packages/backend/app/models.py b/packages/backend/app/models.py index 64d44810..729a226e 100644 --- a/packages/backend/app/models.py +++ b/packages/backend/app/models.py @@ -133,3 +133,17 @@ class AuditLog(db.Model): user_id = db.Column(db.Integer, db.ForeignKey("users.id"), nullable=True) action = db.Column(db.String(100), nullable=False) created_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=False) + + +class DeletionRequest(db.Model): + """Track deletion requests for grace period management.""" + __tablename__ = "deletion_requests" + id = db.Column(db.Integer, primary_key=True) + user_id = db.Column(db.Integer, db.ForeignKey("users.id"), nullable=False) + requested_at = db.Column(db.DateTime, default=datetime.utcnow, nullable=False) + scheduled_deletion_date = db.Column(db.DateTime, nullable=False) + ip_address = db.Column(db.String(45), nullable=True) + user_agent = db.Column(db.String(500), nullable=True) + cancelled = db.Column(db.Boolean, default=False, nullable=False) + cancelled_at = db.Column(db.DateTime, nullable=True) + cancellation_ip = db.Column(db.String(45), nullable=True) diff --git a/packages/backend/app/routes/__init__.py b/packages/backend/app/routes/__init__.py index f13b0f89..288b9816 100644 --- a/packages/backend/app/routes/__init__.py +++ b/packages/backend/app/routes/__init__.py @@ -7,6 +7,7 @@ from .categories import bp as categories_bp from .docs import bp as docs_bp from .dashboard import bp as dashboard_bp +from .gdpr import bp as gdpr_bp def register_routes(app: Flask): @@ -18,3 +19,4 @@ def register_routes(app: Flask): app.register_blueprint(categories_bp, url_prefix="/categories") app.register_blueprint(docs_bp, url_prefix="/docs") app.register_blueprint(dashboard_bp, url_prefix="/dashboard") + app.register_blueprint(gdpr_bp, url_prefix="/gdpr") diff --git a/packages/backend/app/routes/gdpr.py b/packages/backend/app/routes/gdpr.py new file mode 100644 index 00000000..c44e9368 --- /dev/null +++ b/packages/backend/app/routes/gdpr.py @@ -0,0 +1,325 @@ +from flask import Blueprint, request, jsonify, g, current_app +from flask_jwt_extended import jwt_required, get_jwt_identity +from werkzeug.security import generate_password_hash +from datetime import datetime, timedelta +from ..extensions import db +from ..models import ( + User, Category, Expense, RecurringExpense, Bill, + Reminder, AdImpression, UserSubscription, AuditLog, DeletionRequest +) +import logging +import json +import io +import zipfile + +bp = Blueprint("gdpr", __name__) +logger = logging.getLogger("finmind.gdpr") + +# Grace period in days before permanent deletion +GRACE_PERIOD_DAYS = 30 + + + +def _log_audit(user_id: int, action: str, details: dict = None, ip_address: str = None, user_agent: str = None): + """Log an audit trail entry for GDPR actions.""" + audit = AuditLog( + user_id=user_id, + action=action, + ) + db.session.add(audit) + db.session.commit() + log_details = { + "user_id": user_id, + "action": action, + "details": details or {}, + "ip_address": ip_address, + "user_agent": user_agent, + "timestamp": datetime.utcnow().isoformat(), + } + logger.info("GDPR Audit: %s", json.dumps(log_details)) + return audit + + +def _get_request_metadata(): + """Extract IP address and user agent from request.""" + ip_address = request.remote_addr + if request.headers.get("X-Forwarded-For"): + ip_address = request.headers.get("X-Forwarded-For").split(",")[0].strip() + user_agent = request.headers.get("User-Agent", "Unknown") + return ip_address, user_agent + + + +def _collect_user_pii(user_id: int) -> dict: + """Collect all PII for a user across all tables.""" + user = db.session.get(User, user_id) + if not user: + return None + + data = { + "export_timestamp": datetime.utcnow().isoformat(), + "user": { + "id": user.id, + "email": user.email, + "preferred_currency": user.preferred_currency, + "role": user.role, + "created_at": user.created_at.isoformat() if user.created_at else None, + }, + "categories": [], + "expenses": [], + "recurring_expenses": [], + "bills": [], + "reminders": [], + "ad_impressions": [], + "subscriptions": [], + } + + for cat in db.session.query(Category).filter_by(user_id=user_id).all(): + data["categories"].append({ + "id": cat.id, "name": cat.name, + "created_at": cat.created_at.isoformat() if cat.created_at else None, + }) + + for exp in db.session.query(Expense).filter_by(user_id=user_id).all(): + data["expenses"].append({ + "id": exp.id, "category_id": exp.category_id, + "amount": str(exp.amount), "currency": exp.currency, + "expense_type": exp.expense_type, "notes": exp.notes, + "spent_at": exp.spent_at.isoformat() if exp.spent_at else None, + "source_recurring_id": exp.source_recurring_id, + "created_at": exp.created_at.isoformat() if exp.created_at else None, + }) + + for rec in db.session.query(RecurringExpense).filter_by(user_id=user_id).all(): + data["recurring_expenses"].append({ + "id": rec.id, "category_id": rec.category_id, + "amount": str(rec.amount), "currency": rec.currency, + "expense_type": rec.expense_type, "notes": rec.notes, + "cadence": rec.cadence.value if rec.cadence else None, + "start_date": rec.start_date.isoformat() if rec.start_date else None, + "end_date": rec.end_date.isoformat() if rec.end_date else None, + "active": rec.active, + "created_at": rec.created_at.isoformat() if rec.created_at else None, + }) + + for bill in db.session.query(Bill).filter_by(user_id=user_id).all(): + data["bills"].append({ + "id": bill.id, "name": bill.name, "amount": str(bill.amount), + "currency": bill.currency, + "next_due_date": bill.next_due_date.isoformat() if bill.next_due_date else None, + "cadence": bill.cadence.value if bill.cadence else None, + "autopay_enabled": bill.autopay_enabled, + "channel_whatsapp": bill.channel_whatsapp, "channel_email": bill.channel_email, + "active": bill.active, + "created_at": bill.created_at.isoformat() if bill.created_at else None, + }) + + for rem in db.session.query(Reminder).filter_by(user_id=user_id).all(): + data["reminders"].append({ + "id": rem.id, "bill_id": rem.bill_id, "message": rem.message, + "send_at": rem.send_at.isoformat() if rem.send_at else None, + "sent": rem.sent, "channel": rem.channel, + }) + + for ad in db.session.query(AdImpression).filter_by(user_id=user_id).all(): + data["ad_impressions"].append({ + "id": ad.id, "placement": ad.placement, + "created_at": ad.created_at.isoformat() if ad.created_at else None, + }) + + for sub in db.session.query(UserSubscription).filter_by(user_id=user_id).all(): + data["subscriptions"].append({ + "id": sub.id, "plan_id": sub.plan_id, "active": sub.active, + "started_at": sub.started_at.isoformat() if sub.started_at else None, + }) + + return data + + + +@bp.get("/export") +@jwt_required() +def export_user_data(): + """Export all user data in GDPR-compliant format. Returns a ZIP file.""" + uid = int(get_jwt_identity()) + ip_address, user_agent = _get_request_metadata() + + user = db.session.get(User, uid) + if not user: + return jsonify(error="user not found"), 404 + + data = _collect_user_pii(uid) + if not data: + return jsonify(error="failed to collect user data"), 500 + + _log_audit( + user_id=uid, + action="GDPR_EXPORT", + details={"format": "json", "records_exported": sum([ + len(data["categories"]), len(data["expenses"]), + len(data["recurring_expenses"]), len(data["bills"]), + len(data["reminders"]), len(data["ad_impressions"]), + len(data["subscriptions"]), + ])}, + ip_address=ip_address, + user_agent=user_agent, + ) + + memory_file = io.BytesIO() + with zipfile.ZipFile(memory_file, "w", zipfile.ZIP_DEFLATED) as zf: + export_json = json.dumps(data, indent=2, default=str) + timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + zf.writestr(f"user_data_{uid}_{timestamp}.json", export_json) + readme = f"""FinMind GDPR Data Export +======================== +User ID: {uid} +Export Date: {data["export_timestamp"]} +This archive contains all personal data stored by FinMind. +Password hashes are not included for security reasons. +To request deletion, use DELETE /gdpr/delete endpoint. +""" + zf.writestr("README.txt", readme) + + memory_file.seek(0) + logger.info("GDPR export completed for user_id=%s", uid) + timestamp = datetime.utcnow().strftime("%Y%m%d_%H%M%S") + + return current_app.response_class( + memory_file.getvalue(), + mimetype="application/zip", + headers={"Content-Disposition": f"attachment; filename=finmind_export_{uid}_{timestamp}.zip"} + ) + + + +@bp.post("/delete-request") +@jwt_required() +def request_deletion(): + """Request account deletion. Starts the grace period countdown.""" + uid = int(get_jwt_identity()) + ip_address, user_agent = _get_request_metadata() + + user = db.session.get(User, uid) + if not user: + return jsonify(error="user not found"), 404 + + existing = db.session.query(DeletionRequest).filter_by(user_id=uid, cancelled=False).first() + if existing: + days_left = (existing.scheduled_deletion_date - datetime.utcnow()).days + return jsonify( + error="deletion already requested", + scheduled_deletion_date=existing.scheduled_deletion_date.isoformat(), + days_remaining=days_left, + ), 409 + + scheduled_date = datetime.utcnow() + timedelta(days=GRACE_PERIOD_DAYS) + deletion_request = DeletionRequest( + user_id=uid, + requested_at=datetime.utcnow(), + scheduled_deletion_date=scheduled_date, + ip_address=ip_address, + user_agent=user_agent, + ) + db.session.add(deletion_request) + + _log_audit( + user_id=uid, + action="GDPR_DELETE_REQUEST", + details={"grace_period_days": GRACE_PERIOD_DAYS, "scheduled_date": scheduled_date.isoformat()}, + ip_address=ip_address, + user_agent=user_agent, + ) + + db.session.commit() + logger.info("GDPR deletion requested for user_id=%s, scheduled for %s", uid, scheduled_date) + + return jsonify( + message="deletion request received", + scheduled_deletion_date=scheduled_date.isoformat(), + grace_period_days=GRACE_PERIOD_DAYS, + cancellation_endpoint="/gdpr/delete-request", + ), 202 + + + +@bp.delete("/delete-request") +@jwt_required() +def cancel_deletion_request(): + """Cancel a pending deletion request during the grace period.""" + uid = int(get_jwt_identity()) + ip_address, user_agent = _get_request_metadata() + + deletion_request = db.session.query(DeletionRequest).filter_by(user_id=uid, cancelled=False).first() + if not deletion_request: + return jsonify(error="no pending deletion request"), 404 + + deletion_request.cancelled = True + deletion_request.cancelled_at = datetime.utcnow() + deletion_request.cancellation_ip = ip_address + + _log_audit( + user_id=uid, + action="GDPR_DELETE_CANCEL", + details={"original_scheduled_date": deletion_request.scheduled_deletion_date.isoformat()}, + ip_address=ip_address, + user_agent=user_agent, + ) + + db.session.commit() + logger.info("GDPR deletion cancelled for user_id=%s", uid) + + return jsonify(message="deletion request cancelled"), 200 + + +@bp.delete("/delete") +@jwt_required() +def confirm_deletion(): + uid = int(get_jwt_identity()) + ip_address, user_agent = _get_request_metadata() + + data = request.get_json() or {} + if data.get("confirm") != "DELETE_MY_ACCOUNT": + return jsonify(error="confirmation required", hint="send confirm=DELETE_MY_ACCOUNT"), 400 + + user = db.session.get(User, uid) + if not user: + return jsonify(error="user not found"), 404 + + audit_details = {"email": user.email, "user_id_original": uid, "immediate_deletion": True, "records_deleted": {}} + + try: + audit_details["records_deleted"]["subscriptions"] = db.session.query(UserSubscription).filter_by(user_id=uid).delete() + audit_details["records_deleted"]["reminders"] = db.session.query(Reminder).filter_by(user_id=uid).delete() + audit_details["records_deleted"]["bills"] = db.session.query(Bill).filter_by(user_id=uid).delete() + audit_details["records_deleted"]["expenses"] = db.session.query(Expense).filter_by(user_id=uid).delete() + audit_details["records_deleted"]["recurring_expenses"] = db.session.query(RecurringExpense).filter_by(user_id=uid).delete() + audit_details["records_deleted"]["categories"] = db.session.query(Category).filter_by(user_id=uid).delete() + audit_details["records_deleted"]["ad_impressions_anonymized"] = db.session.query(AdImpression).filter_by(user_id=uid).update({"user_id": None}) + db.session.query(DeletionRequest).filter_by(user_id=uid).delete() + db.session.delete(user) + final_audit = AuditLog(user_id=None, action="GDPR_DELETE_CONFIRMED") + db.session.add(final_audit) + logger.info("GDPR IRREVERSIBLE DELETE completed: %s", json.dumps(audit_details)) + db.session.commit() + return jsonify(message="account permanently deleted", details=audit_details["records_deleted"]), 200 + except Exception as e: + db.session.rollback() + logger.error("GDPR deletion failed for user_id=%%s: %%s", uid, str(e)) + return jsonify(error="deletion failed", details=str(e)), 500 + +@bp.get("/status") +@jwt_required() +def deletion_status(): + uid = int(get_jwt_identity()) + deletion_request = db.session.query(DeletionRequest).filter_by(user_id=uid, cancelled=False).first() + if not deletion_request: + return jsonify(has_pending_deletion=False, message="no pending deletion request"), 200 + days_remaining = (deletion_request.scheduled_deletion_date - datetime.utcnow()).days + return jsonify( + has_pending_deletion=True, + requested_at=deletion_request.requested_at.isoformat(), + scheduled_deletion_date=deletion_request.scheduled_deletion_date.isoformat(), + days_remaining=max(0, days_remaining), + grace_period_days=GRACE_PERIOD_DAYS, + can_cancel=True, + ), 200 diff --git a/packages/backend/tests/test_gdpr.py b/packages/backend/tests/test_gdpr.py new file mode 100644 index 00000000..854b738c --- /dev/null +++ b/packages/backend/tests/test_gdpr.py @@ -0,0 +1,132 @@ +import pytest +import os +import fakeredis +from app import create_app +from app.config import Settings +from app.extensions import db, init_redis +from app import models + + +class TestSettings(Settings): + database_url: str = "sqlite+pysqlite:///:memory:" + redis_url: str = "redis://localhost:6379/15" + jwt_secret: str = "test-secret-with-32-plus-chars" + + +@pytest.fixture() +def gdpr_app(): + os.environ.setdefault("FLASK_ENV", "testing") + + # Use fakeredis for testing + fake_redis = fakeredis.FakeRedis(decode_responses=True) + + settings = TestSettings( + database_url="sqlite+pysqlite:///:memory:", + redis_url="redis://localhost:6379/15", + jwt_secret="test-secret-with-32-plus-chars-1234567890", + ) + + app = create_app(settings) + app.config.update(TESTING=True) + + # Override Redis with fake instance + from app.extensions import _redis_client + import app.extensions as ext + ext._redis_client = fake_redis + + with app.app_context(): + db.create_all() + yield app + with app.app_context(): + db.session.remove() + db.drop_all() + + +@pytest.fixture() +def gdpr_client(gdpr_app): + return gdpr_app.test_client() + + +@pytest.fixture() +def gdpr_auth_header(gdpr_client): + email = "gdpr_test@example.com" + password = "password123" + gdpr_client.post("/auth/register", json={"email": email, "password": password}) + r = gdpr_client.post("/auth/login", json={"email": email, "password": password}) + access = r.get_json()["access_token"] + return {"Authorization": f"Bearer {access}"} + + +def test_gdpr_export_returns_zip(gdpr_client, gdpr_auth_header): + r = gdpr_client.get("/gdpr/export", headers=gdpr_auth_header) + assert r.status_code == 200 + assert r.content_type == "application/zip" + assert len(r.data) > 0 + + +def test_gdpr_delete_request_creates_request(gdpr_client, gdpr_auth_header): + r = gdpr_client.post("/gdpr/delete-request", headers=gdpr_auth_header) + assert r.status_code == 202 + data = r.get_json() + assert "scheduled_deletion_date" in data + assert data["grace_period_days"] == 30 + + +def test_gdpr_delete_request_rejects_duplicate(gdpr_client, gdpr_auth_header): + r1 = gdpr_client.post("/gdpr/delete-request", headers=gdpr_auth_header) + assert r1.status_code == 202 + r2 = gdpr_client.post("/gdpr/delete-request", headers=gdpr_auth_header) + assert r2.status_code == 409 + + +def test_gdpr_cancel_delete_request(gdpr_client, gdpr_auth_header): + gdpr_client.post("/gdpr/delete-request", headers=gdpr_auth_header) + r = gdpr_client.delete("/gdpr/delete-request", headers=gdpr_auth_header) + assert r.status_code == 200 + assert r.get_json()["message"] == "deletion request cancelled" + + +def test_gdpr_status_no_pending(gdpr_client, gdpr_auth_header): + r = gdpr_client.get("/gdpr/status", headers=gdpr_auth_header) + assert r.status_code == 200 + data = r.get_json() + assert data["has_pending_deletion"] == False + + +def test_gdpr_status_with_pending(gdpr_client, gdpr_auth_header): + gdpr_client.post("/gdpr/delete-request", headers=gdpr_auth_header) + r = gdpr_client.get("/gdpr/status", headers=gdpr_auth_header) + assert r.status_code == 200 + data = r.get_json() + assert data["has_pending_deletion"] == True + assert "days_remaining" in data + + +def test_gdpr_immediate_delete_requires_confirmation(gdpr_client, gdpr_auth_header): + r = gdpr_client.delete("/gdpr/delete", headers=gdpr_auth_header, json={}) + assert r.status_code == 400 + assert "confirmation required" in r.get_json()["error"] + + +def test_gdpr_immediate_delete_with_confirmation(gdpr_client, gdpr_auth_header): + r = gdpr_client.delete("/gdpr/delete", headers=gdpr_auth_header, json={"confirm": "DELETE_MY_ACCOUNT"}) + assert r.status_code == 200 + data = r.get_json() + assert "account permanently deleted" in data["message"] + assert "details" in data + + +def test_gdpr_delete_removes_user(gdpr_client): + email = "delete_test@example.com" + password = "password123" + gdpr_client.post("/auth/register", json={"email": email, "password": password}) + r = gdpr_client.post("/auth/login", json={"email": email, "password": password}) + access = r.get_json()["access_token"] + auth = {"Authorization": f"Bearer {access}"} + + r = gdpr_client.delete("/gdpr/delete", headers=auth, json={"confirm": "DELETE_MY_ACCOUNT"}) + assert r.status_code == 200 + + # Verify user can no longer login + r = gdpr_client.post("/auth/login", json={"email": email, "password": password}) + assert r.status_code == 401 \ No newline at end of file