Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ jobs:

- name: Install dependencies (matches Makefile exactly)
run: |
uv pip install --python $(which python) -r requirements.txt
uv pip install --python $(which python) -r requirements-dev.txt

- name: Mypy (strict) - matches Makefile exactly
run: |
Expand All @@ -86,7 +86,7 @@ jobs:

- name: Install dependencies
run: |
uv pip install --python $(which python) -r requirements.txt
uv pip install --python $(which python) -r requirements-dev.txt

- name: Run tests with coverage (matches Makefile exactly)
env:
Expand Down
3 changes: 1 addition & 2 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,7 @@ jobs:

- name: Install pre-commit and dependencies
run: |
uv pip install --python $(which python) pre-commit
uv pip install --python $(which python) -r requirements.txt
uv pip install --python $(which python) -r requirements-dev.txt

- name: Run pre-commit hooks (fastest feedback)
run: |
Expand Down
1 change: 1 addition & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ repos:
hooks:
- id: trailing-whitespace
name: Remove trailing whitespace
args: ["--markdown-linebreak-ext=md"]
- id: end-of-file-fixer
name: Add missing end-of-file
exclude: ^(alembic/versions/.*\.py)$
Expand Down
23 changes: 19 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,15 @@ install: ## Install dependencies
@echo "$(BLUE)Installing dependencies...$(NC)"
pip install -r requirements.txt

deps-check: ## Check dependency resolution (uv dry-run, matches CI)
@echo "$(BLUE)Checking dependency resolution with uv...$(NC)"
@if command -v uv >/dev/null 2>&1; then \
uv pip install --dry-run --python "$$(which python)" -r requirements-dev.txt >/dev/null && \
echo "$(GREEN)✓ Dependency resolution OK$(NC)"; \
else \
echo "$(YELLOW)WARN: uv not found; skipping deps-check (CI uses uv)$(NC)"; \
fi

format: ## Format code with ruff
@echo "$(BLUE)Formatting code with ruff...$(NC)"
ruff format .
Expand Down Expand Up @@ -101,16 +110,22 @@ ci-local: ## Run CI checks locally (same as GitHub Actions)
@echo "$(BLUE) Running CI Pipeline (Local)$(NC)"
@echo "$(BLUE)===========================================$(NC)"
@echo ""
@echo "$(YELLOW)Step 1/4: Format Check$(NC)"
@echo "$(YELLOW)Step 0/6: Dependency Resolution$(NC)"
@make deps-check
@echo ""
@echo "$(YELLOW)Step 1/6: Pre-commit (auto-fix)$(NC)"
@make pre-commit
@echo ""
@echo "$(YELLOW)Step 2/6: Format Check$(NC)"
@make format-check
@echo ""
@echo "$(YELLOW)Step 2/4: Lint$(NC)"
@echo "$(YELLOW)Step 3/6: Lint$(NC)"
@make lint
@echo ""
@echo "$(YELLOW)Step 3/4: Type Check$(NC)"
@echo "$(YELLOW)Step 4/6: Type Check$(NC)"
@make typecheck
@echo ""
@echo "$(YELLOW)Step 4/4: Tests$(NC)"
@echo "$(YELLOW)Step 5/6: Tests$(NC)"
@make test
@echo ""
@echo "$(GREEN)===========================================$(NC)"
Expand Down
112 changes: 112 additions & 0 deletions alembic/versions/202512131400_add_source_to_dedup_keys.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
"""Backfill cluster_key/dedup_key to include source_id.

P1.3 in docs/TECHNICAL_SPEC_EXTRACTION_QUALITY.md requires dedup keys to be
source-aware to avoid cross-source collisions.
"""

from __future__ import annotations

import hashlib
import json
from datetime import datetime
from typing import Any

import sqlalchemy as sa

from alembic import op

# revision identifiers, used by Alembic.
revision = "202512131400"
down_revision = "202511060930"
branch_labels = None
depends_on = None


def _normalize_anchors(value: Any) -> list[str]:
if value is None:
return []
if isinstance(value, list):
return [str(item) for item in value if item is not None]
if isinstance(value, str):
try:
parsed = json.loads(value)
except Exception: # noqa: BLE001
return []
if isinstance(parsed, list):
return [str(item) for item in parsed if item is not None]
return []


def _iso(dt: Any) -> str | None:
if dt is None:
return None
if isinstance(dt, datetime):
return dt.isoformat()
return str(dt)


def upgrade() -> None:
bind = op.get_bind()

rows = bind.execute(
sa.text(
"""
SELECT event_id, source_id, action, object_id, object_name_raw, anchors,
status, actual_start, actual_end, planned_start, planned_end, environment
FROM events
"""
)
).mappings()

updates: list[dict[str, Any]] = []
for row in rows:
source_id = (row.get("source_id") or "slack").strip()
action = (row.get("action") or "").strip()
object_key = (row.get("object_id") or "").strip() or (
(row.get("object_name_raw") or "").lower().strip()
)
anchors = _normalize_anchors(row.get("anchors"))
top_anchor = anchors[0] if anchors else ""

cluster_material = f"{source_id}||{action}||{object_key}||{top_anchor}"
cluster_key = hashlib.sha1(cluster_material.encode("utf-8")).hexdigest()

status_val = (row.get("status") or "").strip()
env_val = (row.get("environment") or "").strip()
primary_time = (
row.get("actual_start")
or row.get("actual_end")
or row.get("planned_start")
or row.get("planned_end")
)
time_str = _iso(primary_time) or "no-time"

dedup_material = f"{cluster_key}||{status_val}||{time_str}||{env_val}"
dedup_key = hashlib.sha1(dedup_material.encode("utf-8")).hexdigest()

updates.append(
{
"event_id": row["event_id"],
"cluster_key": cluster_key,
"dedup_key": dedup_key,
}
)

if not updates:
return

bind.execute(
sa.text(
"""
UPDATE events
SET cluster_key = :cluster_key,
dedup_key = :dedup_key
WHERE event_id = :event_id
"""
),
updates,
)


def downgrade() -> None:
"""Keys are not reversible; leave as-is."""
6 changes: 6 additions & 0 deletions config/defaults/main.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,12 @@ llm:
max_events_per_msg: 5
cache_ttl_days: 21 # Purge cached LLM responses after 21 days

# Extraction quality controls
extraction:
time_completion_enabled: true # Fill required event times from message timestamp
prompt_metadata_enabled: true # Include deterministic metadata block in user prompt
prompt_metadata_max_anchors: 10 # Limit anchors included in metadata

# Database Configuration
database:
path: data/slack_events.db
Expand Down
4 changes: 2 additions & 2 deletions config/prompts/telegram.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ system: |

1) security_incident:
- Hacks, exploits, breaches, theft of funds, critical vulnerabilities, compromised keys, bridge exploits.
- Examples (EN/RU): "hack", "exploit", "breach", "security incident", "funds stolen", "drained",
- Examples (EN/RU): "hack", "exploit", "breach", "security incident", "funds stolen", "drained",
"взлом", "эксплойт", "угнали средства", "слив приватных ключей", "брижд взломали", "протокол задрейнили".

2) competitor_update:
- Product / business changes of other wallets, exchanges, payment apps or custodial services that may compete with wallet.tg.
- Include: new features, supported assets, cards, P2P, staking, fees, geographies, integrations with Telegram or messengers.
- Treat as competitor any product that offers: custody of crypto/stablecoins, payments, P2P, on/off-ramp, cards, swaps,
- Treat as competitor any product that offers: custody of crypto/stablecoins, payments, P2P, on/off-ramp, cards, swaps,
especially if it has a Telegram bot or is used inside messengers.
- Examples (EN/RU): "launched a new wallet", "support USDT on Tron", "new crypto card", "P2P service",
"запустили кошелёк", "кошелёк в Telegram", "бот для криптоплатежей", "поддержка USDT", "запуск P2P", "новая карта".
Expand Down
18 changes: 18 additions & 0 deletions config/schemas/main.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,24 @@
"description": "Logging level"
}
}
},
"extraction": {
"type": "object",
"properties": {
"time_completion_enabled": {
"type": "boolean",
"description": "Fill required time fields from message timestamp when missing"
},
"prompt_metadata_enabled": {
"type": "boolean",
"description": "Include structured message metadata in the LLM prompt"
},
"prompt_metadata_max_anchors": {
"type": "integer",
"minimum": 0,
"description": "Maximum anchors to include in prompt metadata"
}
}
}
}
}
Loading